Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 24 Feb 2017 04:33:51 +0000 (20:33 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 24 Feb 2017 04:33:51 +0000 (20:33 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 24 Feb 2017 04:33:51 +0000 (20:33 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 24 Feb 2017 04:33:51 +0000 (20:33 -0800)
diff --combined fs/debugfs/inode.c

index 7fb1732,1e30f74..7fd4ec4
--- 1/fs/debugfs/inode.c
--- 2/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@@ -187,9 -187,9 +187,9 @@@ static const struct super_operations de
   
   static struct vfsmount *debugfs_automount(struct path *path)
   {
-       struct vfsmount *(*f)(void *);
-       f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
-       return f(d_inode(path->dentry)->i_private);
+       debugfs_automount_t f;
+       f = (debugfs_automount_t)path->dentry->d_fsdata;
+       return f(path->dentry, d_inode(path->dentry)->i_private);
   }
   
   static const struct dentry_operations debugfs_dops = {
@@@ -248,42 -248,6 +248,42 @@@ static struct file_system_type debug_fs
   };
   MODULE_ALIAS_FS("debugfs");
   
+ +/**
+ + * debugfs_lookup() - look up an existing debugfs file
+ + * @name: a pointer to a string containing the name of the file to look up.
+ + * @parent: a pointer to the parent dentry of the file.
+ + *
+ + * This function will return a pointer to a dentry if it succeeds.  If the file
+ + * doesn't exist or an error occurs, %NULL will be returned.  The returned
+ + * dentry must be passed to dput() when it is no longer needed.
+ + *
+ + * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ + * returned.
+ + */
+ +struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
+ +{
+ +      struct dentry *dentry;
+ +
+ +      if (IS_ERR(parent))
+ +              return NULL;
+ +
+ +      if (!parent)
+ +              parent = debugfs_mount->mnt_root;
+ +
+ +      inode_lock(d_inode(parent));
+ +      dentry = lookup_one_len(name, parent, strlen(name));
+ +      inode_unlock(d_inode(parent));
+ +
+ +      if (IS_ERR(dentry))
+ +              return NULL;
+ +      if (!d_really_is_positive(dentry)) {
+ +              dput(dentry);
+ +              return NULL;
+ +      }
+ +      return dentry;
+ +}
+ +EXPORT_SYMBOL_GPL(debugfs_lookup);
+ +
   static struct dentry *start_creating(const char *name, struct dentry *parent)
   {
         struct dentry *dentry;
@@@ -540,7 -504,7 +540,7 @@@ EXPORT_SYMBOL_GPL(debugfs_create_dir)
    */
   struct dentry *debugfs_create_automount(const char *name,
                                         struct dentry *parent,
-                                       struct vfsmount *(*f)(void *),
+                                       debugfs_automount_t f,
                                         void *data)
   {
         struct dentry *dentry = start_creating(name, parent);
diff --combined fs/proc/base.c

index 3d773eb,ad98d88..b73b4de
--- 1/fs/proc/base.c
--- 2/fs/proc/base.c
+++ b/fs/proc/base.c
@@@ -1667,12 -1667,63 +1667,63 @@@ const struct inode_operations proc_pid_
   
   /* building an inode */
   
+ void task_dump_owner(struct task_struct *task, mode_t mode,
+                    kuid_t *ruid, kgid_t *rgid)
+ {
+       /* Depending on the state of dumpable compute who should own a
+        * proc file for a task.
+        */
+       const struct cred *cred;
+       kuid_t uid;
+       kgid_t gid;
+ 
+       /* Default to the tasks effective ownership */
+       rcu_read_lock();
+       cred = __task_cred(task);
+       uid = cred->euid;
+       gid = cred->egid;
+       rcu_read_unlock();
+ 
+       /*
+        * Before the /proc/pid/status file was created the only way to read
+        * the effective uid of a /process was to stat /proc/pid.  Reading
+        * /proc/pid/status is slow enough that procps and other packages
+        * kept stating /proc/pid.  To keep the rules in /proc simple I have
+        * made this apply to all per process world readable and executable
+        * directories.
+        */
+       if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
+               struct mm_struct *mm;
+               task_lock(task);
+               mm = task->mm;
+               /* Make non-dumpable tasks owned by some root */
+               if (mm) {
+                       if (get_dumpable(mm) != SUID_DUMP_USER) {
+                               struct user_namespace *user_ns = mm->user_ns;
+ 
+                               uid = make_kuid(user_ns, 0);
+                               if (!uid_valid(uid))
+                                       uid = GLOBAL_ROOT_UID;
+ 
+                               gid = make_kgid(user_ns, 0);
+                               if (!gid_valid(gid))
+                                       gid = GLOBAL_ROOT_GID;
+                       }
+               } else {
+                       uid = GLOBAL_ROOT_UID;
+                       gid = GLOBAL_ROOT_GID;
+               }
+               task_unlock(task);
+       }
+       *ruid = uid;
+       *rgid = gid;
+ }
+ 
   struct inode *proc_pid_make_inode(struct super_block * sb,
                                   struct task_struct *task, umode_t mode)
   {
         struct inode * inode;
         struct proc_inode *ei;
-       const struct cred *cred;
   
         /* We need a new inode */
   
@@@ -1694,13 -1745,7 +1745,7 @@@
         if (!ei->pid)
                 goto out_unlock;
   
-       if (task_dumpable(task)) {
-               rcu_read_lock();
-               cred = __task_cred(task);
-               inode->i_uid = cred->euid;
-               inode->i_gid = cred->egid;
-               rcu_read_unlock();
-       }
+       task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
         security_task_to_inode(task, inode);
   
   out:
@@@ -1715,7 -1760,6 +1760,6 @@@ int pid_getattr(struct vfsmount *mnt, s
   {
         struct inode *inode = d_inode(dentry);
         struct task_struct *task;
-       const struct cred *cred;
         struct pid_namespace *pid = dentry->d_sb->s_fs_info;
   
         generic_fillattr(inode, stat);
@@@ -1733,12 -1777,7 +1777,7 @@@
                          */
                         return -ENOENT;
                 }
-               if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
-                   task_dumpable(task)) {
-                       cred = __task_cred(task);
-                       stat->uid = cred->euid;
-                       stat->gid = cred->egid;
-               }
+               task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
         }
         rcu_read_unlock();
         return 0;
@@@ -1754,18 -1793,11 +1793,11 @@@
    * Rewrite the inode's ownerships here because the owning task may have
    * performed a setuid(), etc.
    *
-  * Before the /proc/pid/status file was created the only way to read
-  * the effective uid of a /process was to stat /proc/pid.  Reading
-  * /proc/pid/status is slow enough that procps and other packages
-  * kept stating /proc/pid.  To keep the rules in /proc simple I have
-  * made this apply to all per process world readable and executable
-  * directories.
    */
   int pid_revalidate(struct dentry *dentry, unsigned int flags)
   {
         struct inode *inode;
         struct task_struct *task;
-       const struct cred *cred;
   
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
@@@ -1774,17 -1806,8 +1806,8 @@@
         task = get_proc_task(inode);
   
         if (task) {
-               if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
-                   task_dumpable(task)) {
-                       rcu_read_lock();
-                       cred = __task_cred(task);
-                       inode->i_uid = cred->euid;
-                       inode->i_gid = cred->egid;
-                       rcu_read_unlock();
-               } else {
-                       inode->i_uid = GLOBAL_ROOT_UID;
-                       inode->i_gid = GLOBAL_ROOT_GID;
-               }
+               task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+ 
                 inode->i_mode &= ~(S_ISUID | S_ISGID);
                 security_task_to_inode(task, inode);
                 put_task_struct(task);
@@@ -1881,7 -1904,6 +1904,6 @@@ static int map_files_d_revalidate(struc
         bool exact_vma_exists = false;
         struct mm_struct *mm = NULL;
         struct task_struct *task;
-       const struct cred *cred;
         struct inode *inode;
         int status = 0;
   
@@@ -1906,16 -1928,8 +1928,8 @@@
         mmput(mm);
   
         if (exact_vma_exists) {
-               if (task_dumpable(task)) {
-                       rcu_read_lock();
-                       cred = __task_cred(task);
-                       inode->i_uid = cred->euid;
-                       inode->i_gid = cred->egid;
-                       rcu_read_unlock();
-               } else {
-                       inode->i_uid = GLOBAL_ROOT_UID;
-                       inode->i_gid = GLOBAL_ROOT_GID;
-               }
+               task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+ 
                 security_task_to_inode(task, inode);
                 status = 1;
         }
@@@ -2179,7 -2193,7 +2193,7 @@@ static const struct file_operations pro
         .llseek         = generic_file_llseek,
   };
   
- -#ifdef CONFIG_CHECKPOINT_RESTORE
+ +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
   struct timers_private {
         struct pid *pid;
         struct task_struct *task;
@@@ -2488,12 -2502,6 +2502,12 @@@ static ssize_t proc_pid_attr_write(stru
         length = -ESRCH;
         if (!task)
                 goto out_no_task;
+ +
+ +      /* A task may only write its own attributes. */
+ +      length = -EACCES;
+ +      if (current != task)
+ +              goto out;
+ +
         if (count > PAGE_SIZE)
                 count = PAGE_SIZE;
   
@@@ -2509,13 -2517,14 +2523,13 @@@
         }
   
         /* Guard against adverse ptrace interaction */
- -      length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
+ +      length = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
         if (length < 0)
                 goto out_free;
   
- -      length = security_setprocattr(task,
- -                                    (char*)file->f_path.dentry->d_name.name,
+ +      length = security_setprocattr(file->f_path.dentry->d_name.name,
                                       page, count);
- -      mutex_unlock(&task->signal->cred_guard_mutex);
+ +      mutex_unlock(&current->signal->cred_guard_mutex);
   out_free:
         kfree(page);
   out:
@@@ -2941,7 -2950,7 +2955,7 @@@ static const struct pid_entry tgid_base
         REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
         REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
   #endif
- -#ifdef CONFIG_CHECKPOINT_RESTORE
+ +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
         REG("timers",     S_IRUGO, proc_timers_operations),
   #endif
         REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
@@@ -3184,8 -3193,6 +3198,8 @@@ int proc_pid_readdir(struct file *file
              iter.tgid += 1, iter = next_tgid(ns, iter)) {
                 char name[PROC_NUMBUF];
                 int len;
+ +
+ +              cond_resched();
                 if (!has_pid_permissions(ns, iter.task, 2))
                         continue;
   
diff --combined fs/super.c

index ea662b0,4185844..b8b6a08
--- 1/fs/super.c
--- 2/fs/super.c
+++ b/fs/super.c
@@@ -469,7 -469,7 +469,7 @@@ struct super_block *sget_userns(struct 
         struct super_block *old;
         int err;
   
-       if (!(flags & MS_KERNMOUNT) &&
+       if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
             !(type->fs_flags & FS_USERNS_MOUNT) &&
             !capable(CAP_SYS_ADMIN))
                 return ERR_PTR(-EPERM);
@@@ -499,7 -499,7 +499,7 @@@ retry
         }
         if (!s) {
                 spin_unlock(&sb_lock);
-               s = alloc_super(type, flags, user_ns);
+               s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
                 if (!s)
                         return ERR_PTR(-ENOMEM);
                 goto retry;
@@@ -540,8 -540,15 +540,15 @@@ struct super_block *sget(struct file_sy
   {
         struct user_namespace *user_ns = current_user_ns();
   
+       /* We don't yet pass the user namespace of the parent
+        * mount through to here so always use &init_user_ns
+        * until that changes.
+        */
+       if (flags & MS_SUBMOUNT)
+               user_ns = &init_user_ns;
+ 
         /* Ensure the requestor has permissions over the target filesystem */
-       if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+       if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
                 return ERR_PTR(-EPERM);
   
         return sget_userns(type, test, set, flags, user_ns, data);
@@@ -1047,7 -1054,7 +1054,7 @@@ static int set_bdev_super(struct super_
          * We set the bdi here to the queue backing, file systems can
          * overwrite this in ->fill_super()
          */
- -      s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
+ +      s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
         return 0;
   }
   
diff --combined include/linux/debugfs.h

index 9d571ac,233006b..7dff776
--- 1/include/linux/debugfs.h
--- 2/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@@ -52,7 -52,8 +52,7 @@@ extern struct srcu_struct debugfs_srcu
    * Must only be called under the protection established by
    * debugfs_use_file_start().
    */
- -static inline const struct file_operations *
- -debugfs_real_fops(const struct file *filp)
+ +static inline const struct file_operations *debugfs_real_fops(const struct file *filp)
         __must_hold(&debugfs_srcu)
   {
         /*
@@@ -79,8 -80,6 +79,8 @@@ static const struct file_operations __f
   
   #if defined(CONFIG_DEBUG_FS)
   
+ +struct dentry *debugfs_lookup(const char *name, struct dentry *parent);
+ +
   struct dentry *debugfs_create_file(const char *name, umode_t mode,
                                    struct dentry *parent, void *data,
                                    const struct file_operations *fops);
@@@ -98,9 -97,10 +98,10 @@@ struct dentry *debugfs_create_dir(cons
   struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
                                       const char *dest);
   
+ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
   struct dentry *debugfs_create_automount(const char *name,
                                         struct dentry *parent,
-                                       struct vfsmount *(*f)(void *),
+                                       debugfs_automount_t f,
                                         void *data);
   
   void debugfs_remove(struct dentry *dentry);
@@@ -182,12 -182,6 +183,12 @@@ ssize_t debugfs_write_file_bool(struct 
    * want to duplicate the design decision mistakes of procfs and devfs again.
    */
   
+ +static inline struct dentry *debugfs_lookup(const char *name,
+ +                                          struct dentry *parent)
+ +{
+ +      return ERR_PTR(-ENODEV);
+ +}
+ +
   static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
                                         struct dentry *parent, void *data,
                                         const struct file_operations *fops)
diff --combined include/linux/fsnotify_backend.h

index 4872465,c8f2738..e6e689b
--- 1/include/linux/fsnotify_backend.h
--- 2/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@@ -16,6 -16,7 +16,7 @@@
   #include <linux/spinlock.h>
   #include <linux/types.h>
   #include <linux/atomic.h>
+ #include <linux/user_namespace.h>
   
   /*
    * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@@ -170,7 -171,7 +171,7 @@@ struct fsnotify_group 
                 struct inotify_group_private_data {
                         spinlock_t      idr_lock;
                         struct idr      idr;
-                       struct user_struct      *user;
+                       struct ucounts *ucounts;
                 } inotify_data;
   #endif
   #ifdef CONFIG_FANOTIFY
@@@ -323,6 -324,8 +324,6 @@@ extern void fsnotify_init_mark(struct f
   extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode);
   /* find (and take a reference) to a mark associated with group and vfsmount */
   extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt);
- -/* copy the values from old into new */
- -extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old);
   /* set the ignored_mask of a mark */
   extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask);
   /* set the mask of a mark (might pin the object into memory */
diff --combined include/linux/sched.h

index c8e519d,6261bfc..451e241
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -29,6 -29,7 +29,6 @@@ struct sched_param 
   
   #include <asm/page.h>
   #include <asm/ptrace.h>
- -#include <linux/cputime.h>
   
   #include <linux/smp.h>
   #include <linux/sem.h>
@@@ -226,7 -227,7 +226,7 @@@ extern void proc_sched_set_task(struct 
   extern char ___assert_task_state[1 - 2*!!(
                 sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
   
- -/* Convenience macros for the sake of set_task_state */
+ +/* Convenience macros for the sake of set_current_state */
   #define TASK_KILLABLE         (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
   #define TASK_STOPPED          (TASK_WAKEKILL | __TASK_STOPPED)
   #define TASK_TRACED           (TASK_WAKEKILL | __TASK_TRACED)
@@@ -253,6 -254,17 +253,6 @@@
   
   #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
   
- -#define __set_task_state(tsk, state_value)                    \
- -      do {                                                    \
- -              (tsk)->task_state_change = _THIS_IP_;           \
- -              (tsk)->state = (state_value);                   \
- -      } while (0)
- -#define set_task_state(tsk, state_value)                      \
- -      do {                                                    \
- -              (tsk)->task_state_change = _THIS_IP_;           \
- -              smp_store_mb((tsk)->state, (state_value));      \
- -      } while (0)
- -
   #define __set_current_state(state_value)                      \
         do {                                                    \
                 current->task_state_change = _THIS_IP_;         \
@@@ -265,6 -277,20 +265,6 @@@
         } while (0)
   
   #else
- -
- -/*
- - * @tsk had better be current, or you get to keep the pieces.
- - *
- - * The only reason is that computing current can be more expensive than
- - * using a pointer that's already available.
- - *
- - * Therefore, see set_current_state().
- - */
- -#define __set_task_state(tsk, state_value)            \
- -      do { (tsk)->state = (state_value); } while (0)
- -#define set_task_state(tsk, state_value)              \
- -      smp_store_mb((tsk)->state, (state_value))
- -
   /*
    * set_current_state() includes a barrier so that the write of current->state
    * is correctly serialised wrt the caller's subsequent test of whether to
@@@ -435,10 -461,12 +435,10 @@@ extern signed long schedule_timeout_idl
   asmlinkage void schedule(void);
   extern void schedule_preempt_disabled(void);
   
+ +extern int __must_check io_schedule_prepare(void);
+ +extern void io_schedule_finish(int token);
   extern long io_schedule_timeout(long timeout);
- -
- -static inline void io_schedule(void)
- -{
- -      io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
- -}
+ +extern void io_schedule(void);
   
   void __noreturn do_task_dead(void);
   
@@@ -537,13 -565,15 +537,13 @@@ struct pacct_struct 
         int                     ac_flag;
         long                    ac_exitcode;
         unsigned long           ac_mem;
- -      cputime_t               ac_utime, ac_stime;
+ +      u64                     ac_utime, ac_stime;
         unsigned long           ac_minflt, ac_majflt;
   };
   
   struct cpu_itimer {
- -      cputime_t expires;
- -      cputime_t incr;
- -      u32 error;
- -      u32 incr_error;
+ +      u64 expires;
+ +      u64 incr;
   };
   
   /**
@@@ -557,8 -587,8 +557,8 @@@
    */
   struct prev_cputime {
   #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- -      cputime_t utime;
- -      cputime_t stime;
+ +      u64 utime;
+ +      u64 stime;
         raw_spinlock_t lock;
   #endif
   };
@@@ -573,8 -603,8 +573,8 @@@ static inline void prev_cputime_init(st
   
   /**
    * struct task_cputime - collected CPU time counts
- - * @utime:            time spent in user mode, in &cputime_t units
- - * @stime:            time spent in kernel mode, in &cputime_t units
+ + * @utime:            time spent in user mode, in nanoseconds
+ + * @stime:            time spent in kernel mode, in nanoseconds
    * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
    *
    * This structure groups together three kinds of CPU time that are tracked for
@@@ -582,8 -612,8 +582,8 @@@
    * these counts together and treat all three of them in parallel.
    */
   struct task_cputime {
- -      cputime_t utime;
- -      cputime_t stime;
+ +      u64 utime;
+ +      u64 stime;
         unsigned long long sum_exec_runtime;
   };
   
@@@ -592,6 -622,13 +592,6 @@@
   #define prof_exp      stime
   #define sched_exp     sum_exec_runtime
   
- -#define INIT_CPUTIME  \
- -      (struct task_cputime) {                                 \
- -              .utime = 0,                                     \
- -              .stime = 0,                                     \
- -              .sum_exec_runtime = 0,                          \
- -      }
- -
   /*
    * This is the atomic variant of task_cputime, which can be used for
    * storing and updating task_cputime statistics without locking.
@@@ -697,14 -734,13 +697,14 @@@ struct signal_struct 
         unsigned int            is_child_subreaper:1;
         unsigned int            has_child_subreaper:1;
   
+ +#ifdef CONFIG_POSIX_TIMERS
+ +
         /* POSIX.1b Interval Timers */
         int                     posix_timer_id;
         struct list_head        posix_timers;
   
         /* ITIMER_REAL timer for the process */
         struct hrtimer real_timer;
- -      struct pid *leader_pid;
         ktime_t it_real_incr;
   
         /*
@@@ -723,16 -759,12 +723,16 @@@
         /* Earliest-expiration cache. */
         struct task_cputime cputime_expires;
   
+ +      struct list_head cpu_timers[3];
+ +
+ +#endif
+ +
+ +      struct pid *leader_pid;
+ +
   #ifdef CONFIG_NO_HZ_FULL
         atomic_t tick_dep_mask;
   #endif
   
- -      struct list_head cpu_timers[3];
- -
         struct pid *tty_old_pgrp;
   
         /* boolean value for session group leader */
@@@ -750,9 -782,9 +750,9 @@@
          * in __exit_signal, except for the group leader.
          */
         seqlock_t stats_lock;
- -      cputime_t utime, stime, cutime, cstime;
- -      cputime_t gtime;
- -      cputime_t cgtime;
+ +      u64 utime, stime, cutime, cstime;
+ +      u64 gtime;
+ +      u64 cgtime;
         struct prev_cputime prev_cputime;
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
         unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
@@@ -822,16 -854,6 +822,16 @@@
   
   #define SIGNAL_UNKILLABLE     0x00000040 /* for init: ignore fatal signals */
   
+ +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+ +                        SIGNAL_STOP_CONTINUED)
+ +
+ +static inline void signal_set_stop_flags(struct signal_struct *sig,
+ +                                       unsigned int flags)
+ +{
+ +      WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+ +      sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+ +}
+ +
   /* If true, all threads except ->group_exit_task have pending SIGKILL */
   static inline int signal_group_exit(const struct signal_struct *sig)
   {
@@@ -846,10 -868,6 +846,6 @@@ struct user_struct 
         atomic_t __count;       /* reference count */
         atomic_t processes;     /* How many processes does this user have? */
         atomic_t sigpending;    /* How many pending signals does this user have? */
- #ifdef CONFIG_INOTIFY_USER
-       atomic_t inotify_watches; /* How many inotify watches does this user have? */
-       atomic_t inotify_devs;  /* How many inotify devs does this user have opened? */
- #endif
   #ifdef CONFIG_FANOTIFY
         atomic_t fanotify_listeners;
   #endif
@@@ -993,8 -1011,8 +989,8 @@@ enum cpu_idle_type 
    *
    * The DEFINE_WAKE_Q macro declares and initializes the list head.
    * wake_up_q() does NOT reinitialize the list; it's expected to be
- - * called near the end of a function, where the fact that the queue is
- - * not used again will be easy to see by inspection.
+ + * called near the end of a function. Otherwise, the list can be
+ + * re-initialized for later re-use by wake_q_init().
    *
    * Note that this can cause spurious wakeups. schedule() callers
    * must ensure the call is done inside a loop, confirming that the
@@@ -1014,12 -1032,6 +1010,12 @@@ struct wake_q_head 
   #define DEFINE_WAKE_Q(name)                           \
         struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
   
+ +static inline void wake_q_init(struct wake_q_head *head)
+ +{
+ +      head->first = WAKE_Q_TAIL;
+ +      head->lastp = &head->first;
+ +}
+ +
   extern void wake_q_add(struct wake_q_head *head,
                        struct task_struct *task);
   extern void wake_up_q(struct wake_q_head *head);
@@@ -1637,11 -1649,11 +1633,11 @@@ struct task_struct 
         int __user *set_child_tid;              /* CLONE_CHILD_SETTID */
         int __user *clear_child_tid;            /* CLONE_CHILD_CLEARTID */
   
- -      cputime_t utime, stime;
+ +      u64 utime, stime;
   #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
- -      cputime_t utimescaled, stimescaled;
+ +      u64 utimescaled, stimescaled;
   #endif
- -      cputime_t gtime;
+ +      u64 gtime;
         struct prev_cputime prev_cputime;
   #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
         seqcount_t vtime_seqcount;
@@@ -1665,10 -1677,8 +1661,10 @@@
   /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
         unsigned long min_flt, maj_flt;
   
+ +#ifdef CONFIG_POSIX_TIMERS
         struct task_cputime cputime_expires;
         struct list_head cpu_timers[3];
+ +#endif
   
   /* process credentials */
         const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
@@@ -1793,7 -1803,7 +1789,7 @@@
   #if defined(CONFIG_TASK_XACCT)
         u64 acct_rss_mem1;      /* accumulated rss usage */
         u64 acct_vm_mem1;       /* accumulated virtual memory usage */
- -      cputime_t acct_timexpd; /* stime + utime since last update */
+ +      u64 acct_timexpd;       /* stime + utime since last update */
   #endif
   #ifdef CONFIG_CPUSETS
         nodemask_t mems_allowed;        /* Protected by alloc_lock */
@@@ -2238,17 -2248,17 +2234,17 @@@ struct task_struct *try_get_task_struct
   
   #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
   extern void task_cputime(struct task_struct *t,
- -                       cputime_t *utime, cputime_t *stime);
- -extern cputime_t task_gtime(struct task_struct *t);
+ +                       u64 *utime, u64 *stime);
+ +extern u64 task_gtime(struct task_struct *t);
   #else
   static inline void task_cputime(struct task_struct *t,
- -                              cputime_t *utime, cputime_t *stime)
+ +                              u64 *utime, u64 *stime)
   {
         *utime = t->utime;
         *stime = t->stime;
   }
   
- -static inline cputime_t task_gtime(struct task_struct *t)
+ +static inline u64 task_gtime(struct task_struct *t)
   {
         return t->gtime;
   }
@@@ -2256,23 -2266,23 +2252,23 @@@
   
   #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
   static inline void task_cputime_scaled(struct task_struct *t,
- -                                     cputime_t *utimescaled,
- -                                     cputime_t *stimescaled)
+ +                                     u64 *utimescaled,
+ +                                     u64 *stimescaled)
   {
         *utimescaled = t->utimescaled;
         *stimescaled = t->stimescaled;
   }
   #else
   static inline void task_cputime_scaled(struct task_struct *t,
- -                                     cputime_t *utimescaled,
- -                                     cputime_t *stimescaled)
+ +                                     u64 *utimescaled,
+ +                                     u64 *stimescaled)
   {
         task_cputime(t, utimescaled, stimescaled);
   }
   #endif
   
- -extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
- -extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
+ +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+ +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
   
   /*
    * Per process flags
@@@ -2491,18 -2501,10 +2487,18 @@@ extern u64 sched_clock_cpu(int cpu)
   extern void sched_clock_init(void);
   
   #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+ +static inline void sched_clock_init_late(void)
+ +{
+ +}
+ +
   static inline void sched_clock_tick(void)
   {
   }
   
+ +static inline void clear_sched_clock_stable(void)
+ +{
+ +}
+ +
   static inline void sched_clock_idle_sleep_event(void)
   {
   }
@@@ -2521,7 -2523,6 +2517,7 @@@ static inline u64 local_clock(void
         return sched_clock();
   }
   #else
+ +extern void sched_clock_init_late(void);
   /*
    * Architectures can set this to 1 if they have specified
    * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
@@@ -2529,6 -2530,7 +2525,6 @@@
    * is reliable after all:
    */
   extern int sched_clock_stable(void);
- -extern void set_sched_clock_stable(void);
   extern void clear_sched_clock_stable(void);
   
   extern void sched_clock_tick(void);
@@@ -3051,6 -3053,9 +3047,9 @@@ extern bool current_is_single_threaded(
   #define for_each_process_thread(p, t) \
         for_each_process(p) for_each_thread(p, t)
   
+ typedef int (*proc_visitor)(struct task_struct *p, void *data);
+ void walk_process_tree(struct task_struct *top, proc_visitor, void *);
+ 
   static inline int get_nr_threads(struct task_struct *tsk)
   {
         return tsk->signal->nr_threads;
diff --combined include/linux/security.h

index d3868f2,9d9ee90..96899fa
--- 1/include/linux/security.h
--- 2/include/linux/security.h
+++ b/include/linux/security.h
@@@ -140,8 -140,7 +140,7 @@@ struct request_sock
   /* bprm->unsafe reasons */
   #define LSM_UNSAFE_SHARE      1
   #define LSM_UNSAFE_PTRACE     2
- #define LSM_UNSAFE_PTRACE_CAP 4
- #define LSM_UNSAFE_NO_NEW_PRIVS       8
+ #define LSM_UNSAFE_NO_NEW_PRIVS       4
   
   #ifdef CONFIG_MMU
   extern int mmap_min_addr_handler(struct ctl_table *table, int write,
@@@ -332,6 -331,7 +331,6 @@@ int security_task_getscheduler(struct t
   int security_task_movememory(struct task_struct *p);
   int security_task_kill(struct task_struct *p, struct siginfo *info,
                         int sig, u32 secid);
- -int security_task_wait(struct task_struct *p);
   int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
                         unsigned long arg4, unsigned long arg5);
   void security_task_to_inode(struct task_struct *p, struct inode *inode);
@@@ -360,7 -360,7 +359,7 @@@ int security_sem_semop(struct sem_arra
                         unsigned nsops, int alter);
   void security_d_instantiate(struct dentry *dentry, struct inode *inode);
   int security_getprocattr(struct task_struct *p, char *name, char **value);
- -int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size);
+ +int security_setprocattr(const char *name, void *value, size_t size);
   int security_netlink_send(struct sock *sk, struct sk_buff *skb);
   int security_ismaclabel(const char *name);
   int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
@@@ -979,6 -979,11 +978,6 @@@ static inline int security_task_kill(st
         return 0;
   }
   
- -static inline int security_task_wait(struct task_struct *p)
- -{
- -      return 0;
- -}
- -
   static inline int security_task_prctl(int option, unsigned long arg2,
                                       unsigned long arg3,
                                       unsigned long arg4,
@@@ -1100,7 -1105,7 +1099,7 @@@ static inline int security_getprocattr(
         return -EINVAL;
   }
   
- -static inline int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
+ +static inline int security_setprocattr(char *name, void *value, size_t size)
   {
         return -EINVAL;
   }
diff --combined kernel/exit.c

index 580da79,5cfbd59..9960acc
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -14,6 -14,7 +14,6 @@@
   #include <linux/tty.h>
   #include <linux/iocontext.h>
   #include <linux/key.h>
- -#include <linux/security.h>
   #include <linux/cpu.h>
   #include <linux/acct.h>
   #include <linux/tsacct_kern.h>
@@@ -54,7 -55,6 +54,7 @@@
   #include <linux/shm.h>
   #include <linux/kcov.h>
   #include <linux/random.h>
+ +#include <linux/rcuwait.h>
   
   #include <linux/uaccess.h>
   #include <asm/unistd.h>
@@@ -86,7 -86,7 +86,7 @@@ static void __exit_signal(struct task_s
         bool group_dead = thread_group_leader(tsk);
         struct sighand_struct *sighand;
         struct tty_struct *uninitialized_var(tty);
- -      cputime_t utime, stime;
+ +      u64 utime, stime;
   
         sighand = rcu_dereference_check(tsk->sighand,
                                         lockdep_tasklist_lock_is_held());
@@@ -282,35 -282,6 +282,35 @@@ retry
         return task;
   }
   
+ +void rcuwait_wake_up(struct rcuwait *w)
+ +{
+ +      struct task_struct *task;
+ +
+ +      rcu_read_lock();
+ +
+ +      /*
+ +       * Order condition vs @task, such that everything prior to the load
+ +       * of @task is visible. This is the condition as to why the user called
+ +       * rcuwait_trywake() in the first place. Pairs with set_current_state()
+ +       * barrier (A) in rcuwait_wait_event().
+ +       *
+ +       *    WAIT                WAKE
+ +       *    [S] tsk = current   [S] cond = true
+ +       *        MB (A)              MB (B)
+ +       *    [L] cond            [L] tsk
+ +       */
+ +      smp_rmb(); /* (B) */
+ +
+ +      /*
+ +       * Avoid using task_rcu_dereference() magic as long as we are careful,
+ +       * see comment in rcuwait_wait_event() regarding ->exit_state.
+ +       */
+ +      task = rcu_dereference(w->task);
+ +      if (task)
+ +              wake_up_process(task);
+ +      rcu_read_unlock();
+ +}
+ +
   struct task_struct *try_get_task_struct(struct task_struct **ptask)
   {
         struct task_struct *task;
@@@ -497,12 -468,12 +497,12 @@@ assign_new_owner
    * Turn us into a lazy TLB process if we
    * aren't already..
    */
- -static void exit_mm(struct task_struct *tsk)
+ +static void exit_mm(void)
   {
- -      struct mm_struct *mm = tsk->mm;
+ +      struct mm_struct *mm = current->mm;
         struct core_state *core_state;
   
- -      mm_release(tsk, mm);
+ +      mm_release(current, mm);
         if (!mm)
                 return;
         sync_mm_rss(mm);
@@@ -520,7 -491,7 +520,7 @@@
   
                 up_read(&mm->mmap_sem);
   
- -              self.task = tsk;
+ +              self.task = current;
                 self.next = xchg(&core_state->dumper.next, &self);
                 /*
                  * Implies mb(), the result of xchg() must be visible
@@@ -530,22 -501,22 +530,22 @@@
                         complete(&core_state->startup);
   
                 for (;;) {
- -                      set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ +                      set_current_state(TASK_UNINTERRUPTIBLE);
                         if (!self.task) /* see coredump_finish() */
                                 break;
                         freezable_schedule();
                 }
- -              __set_task_state(tsk, TASK_RUNNING);
+ +              __set_current_state(TASK_RUNNING);
                 down_read(&mm->mmap_sem);
         }
         atomic_inc(&mm->mm_count);
- -      BUG_ON(mm != tsk->active_mm);
+ +      BUG_ON(mm != current->active_mm);
         /* more a memory barrier than a real lock */
- -      task_lock(tsk);
- -      tsk->mm = NULL;
+ +      task_lock(current);
+ +      current->mm = NULL;
         up_read(&mm->mmap_sem);
         enter_lazy_tlb(mm, current);
- -      task_unlock(tsk);
+ +      task_unlock(current);
         mm_update_next_owner(mm);
         mmput(mm);
         if (test_thread_flag(TIF_MEMDIE))
@@@ -607,15 -578,18 +607,18 @@@ static struct task_struct *find_new_rea
                 return thread;
   
         if (father->signal->has_child_subreaper) {
+               unsigned int ns_level = task_pid(father)->level;
                 /*
                  * Find the first ->is_child_subreaper ancestor in our pid_ns.
-                * We start from father to ensure we can not look into another
-                * namespace, this is safe because all its threads are dead.
+                * We can't check reaper != child_reaper to ensure we do not
+                * cross the namespaces, the exiting parent could be injected
+                * by setns() + fork().
+                * We check pid->level, this is slightly more efficient than
+                * task_active_pid_ns(reaper) != task_active_pid_ns(father).
                  */
-               for (reaper = father;
-                    !same_thread_group(reaper, child_reaper);
+               for (reaper = father->real_parent;
+                    task_pid(reaper)->level == ns_level;
                      reaper = reaper->real_parent) {
-                       /* call_usermodehelper() descendants need this check */
                         if (reaper == &init_task)
                                 break;
                         if (!reaper->signal->is_child_subreaper)
@@@ -852,7 -826,7 +855,7 @@@ void __noreturn do_exit(long code
         tsk->exit_code = code;
         taskstats_exit(tsk, group_dead);
   
- -      exit_mm(tsk);
+ +      exit_mm();
   
         if (group_dead)
                 acct_process();
@@@ -1120,7 -1094,7 +1123,7 @@@ static int wait_task_zombie(struct wait
                 struct signal_struct *sig = p->signal;
                 struct signal_struct *psig = current->signal;
                 unsigned long maxrss;
- -              cputime_t tgutime, tgstime;
+ +              u64 tgutime, tgstime;
   
                 /*
                  * The resource counters for the group leader are in its
@@@ -1389,7 -1363,7 +1392,7 @@@ static int wait_task_continued(struct w
    * Returns nonzero for a final return, when we have unlocked tasklist_lock.
    * Returns zero if the search for a child should continue;
    * then ->notask_error is 0 if @p is an eligible child,
- - * or another error from security_task_wait(), or still -ECHILD.
+ + * or still -ECHILD.
    */
   static int wait_consider_task(struct wait_opts *wo, int ptrace,
                                 struct task_struct *p)
@@@ -1409,6 -1383,20 +1412,6 @@@
         if (!ret)
                 return ret;
   
- -      ret = security_task_wait(p);
- -      if (unlikely(ret < 0)) {
- -              /*
- -               * If we have not yet seen any eligible child,
- -               * then let this error code replace -ECHILD.
- -               * A permission error will give the user a clue
- -               * to look for security policy problems, rather
- -               * than for mysterious wait bugs.
- -               */
- -              if (wo->notask_error)
- -                      wo->notask_error = ret;
- -              return 0;
- -      }
- -
         if (unlikely(exit_state == EXIT_TRACE)) {
                 /*
                  * ptrace == 0 means we are the natural parent. In this case
@@@ -1501,7 -1489,7 +1504,7 @@@
    * Returns nonzero for a final return, when we have unlocked tasklist_lock.
    * Returns zero if the search for a child should continue; then
    * ->notask_error is 0 if there were any eligible children,
- - * or another error from security_task_wait(), or still -ECHILD.
+ + * or still -ECHILD.
    */
   static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
   {
diff --combined kernel/fork.c

index d12fcc4,c814e59..348fe73
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -55,7 -55,6 +55,7 @@@
   #include <linux/rmap.h>
   #include <linux/ksm.h>
   #include <linux/acct.h>
+ +#include <linux/userfaultfd_k.h>
   #include <linux/tsacct_kern.h>
   #include <linux/cn_proc.h>
   #include <linux/freezer.h>
@@@ -433,13 -432,11 +433,13 @@@ void __init fork_init(void
         int i;
   #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
   #ifndef ARCH_MIN_TASKALIGN
- -#define ARCH_MIN_TASKALIGN    L1_CACHE_BYTES
+ +#define ARCH_MIN_TASKALIGN    0
   #endif
+ +      int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
+ +
         /* create a slab on which task_structs can be allocated */
         task_struct_cachep = kmem_cache_create("task_struct",
- -                      arch_task_struct_size, ARCH_MIN_TASKALIGN,
+ +                      arch_task_struct_size, align,
                         SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
   #endif
   
@@@ -562,7 -559,6 +562,7 @@@ static __latent_entropy int dup_mmap(st
         struct rb_node **rb_link, *rb_parent;
         int retval;
         unsigned long charge;
+ +      LIST_HEAD(uf);
   
         uprobe_start_dup_mmap();
         if (down_write_killable(&oldmm->mmap_sem)) {
@@@ -619,13 -615,12 +619,13 @@@
                 if (retval)
                         goto fail_nomem_policy;
                 tmp->vm_mm = mm;
+ +              retval = dup_userfaultfd(tmp, &uf);
+ +              if (retval)
+ +                      goto fail_nomem_anon_vma_fork;
                 if (anon_vma_fork(tmp, mpnt))
                         goto fail_nomem_anon_vma_fork;
- -              tmp->vm_flags &=
- -                      ~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP);
+ +              tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
                 tmp->vm_next = tmp->vm_prev = NULL;
- -              tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
                 file = tmp->vm_file;
                 if (file) {
                         struct inode *inode = file_inode(file);
@@@ -681,7 -676,6 +681,7 @@@ out
         up_write(&mm->mmap_sem);
         flush_tlb_mm(oldmm);
         up_write(&oldmm->mmap_sem);
+ +      dup_userfaultfd_complete(&uf);
   fail_uprobe_end:
         uprobe_end_dup_mmap();
         return retval;
@@@ -1310,7 -1304,6 +1310,7 @@@ void __cleanup_sighand(struct sighand_s
         }
   }
   
+ +#ifdef CONFIG_POSIX_TIMERS
   /*
    * Initialize POSIX timer handling for a thread group.
    */
@@@ -1320,7 -1313,7 +1320,7 @@@ static void posix_cpu_timers_init_group
   
         cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
         if (cpu_limit != RLIM_INFINITY) {
- -              sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
+ +              sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
                 sig->cputimer.running = true;
         }
   
@@@ -1329,9 -1322,6 +1329,9 @@@
         INIT_LIST_HEAD(&sig->cpu_timers[1]);
         INIT_LIST_HEAD(&sig->cpu_timers[2]);
   }
+ +#else
+ +static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { }
+ +#endif
   
   static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
   {
@@@ -1356,11 -1346,11 +1356,11 @@@
         init_waitqueue_head(&sig->wait_chldexit);
         sig->curr_target = tsk;
         init_sigpending(&sig->shared_pending);
- -      INIT_LIST_HEAD(&sig->posix_timers);
         seqlock_init(&sig->stats_lock);
         prev_cputime_init(&sig->prev_cputime);
   
   #ifdef CONFIG_POSIX_TIMERS
+ +      INIT_LIST_HEAD(&sig->posix_timers);
         hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         sig->real_timer.function = it_real_fn;
   #endif
@@@ -1377,9 -1367,6 +1377,6 @@@
         sig->oom_score_adj = current->signal->oom_score_adj;
         sig->oom_score_adj_min = current->signal->oom_score_adj_min;
   
-       sig->has_child_subreaper = current->signal->has_child_subreaper ||
-                                  current->signal->is_child_subreaper;
- 
         mutex_init(&sig->cred_guard_mutex);
   
         return 0;
@@@ -1435,7 -1422,6 +1432,7 @@@ static void rt_mutex_init_task(struct t
   #endif
   }
   
+ +#ifdef CONFIG_POSIX_TIMERS
   /*
    * Initialize POSIX timer handling for a single task.
    */
@@@ -1448,9 -1434,6 +1445,9 @@@ static void posix_cpu_timers_init(struc
         INIT_LIST_HEAD(&tsk->cpu_timers[1]);
         INIT_LIST_HEAD(&tsk->cpu_timers[2]);
   }
+ +#else
+ +static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
+ +#endif
   
   static inline void
   init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
@@@ -1814,6 -1797,13 +1811,13 @@@ static __latent_entropy struct task_str
   
                         p->signal->leader_pid = pid;
                         p->signal->tty = tty_kref_get(current->signal->tty);
+                       /*
+                        * Inherit has_child_subreaper flag under the same
+                        * tasklist_lock with adding child to the process tree
+                        * for propagate_has_child_subreaper optimization.
+                        */
+                       p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
+                                                        p->real_parent->signal->is_child_subreaper;
                         list_add_tail(&p->sibling, &p->real_parent->children);
                         list_add_tail_rcu(&p->tasks, &init_task.tasks);
                         attach_pid(p, PIDTYPE_PGID);
@@@ -2067,6 -2057,38 +2071,38 @@@ SYSCALL_DEFINE5(clone, unsigned long, c
   }
   #endif
   
+ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data)
+ {
+       struct task_struct *leader, *parent, *child;
+       int res;
+ 
+       read_lock(&tasklist_lock);
+       leader = top = top->group_leader;
+ down:
+       for_each_thread(leader, parent) {
+               list_for_each_entry(child, &parent->children, sibling) {
+                       res = visitor(child, data);
+                       if (res) {
+                               if (res < 0)
+                                       goto out;
+                               leader = child;
+                               goto down;
+                       }
+ up:
+                       ;
+               }
+       }
+ 
+       if (leader != top) {
+               child = leader;
+               parent = child->real_parent;
+               leader = parent->group_leader;
+               goto up;
+       }
+ out:
+       read_unlock(&tasklist_lock);
+ }
+ 
   #ifndef ARCH_MIN_MMSTRUCT_ALIGN
   #define ARCH_MIN_MMSTRUCT_ALIGN 0
   #endif
diff --combined kernel/sys.c

index 7d4a9a6,0e4d566..b07adca
--- 1/kernel/sys.c
--- 2/kernel/sys.c
+++ b/kernel/sys.c
@@@ -881,15 -881,15 +881,15 @@@ SYSCALL_DEFINE0(getegid
   
   void do_sys_times(struct tms *tms)
   {
- -      cputime_t tgutime, tgstime, cutime, cstime;
+ +      u64 tgutime, tgstime, cutime, cstime;
   
         thread_group_cputime_adjusted(current, &tgutime, &tgstime);
         cutime = current->signal->cutime;
         cstime = current->signal->cstime;
- -      tms->tms_utime = cputime_to_clock_t(tgutime);
- -      tms->tms_stime = cputime_to_clock_t(tgstime);
- -      tms->tms_cutime = cputime_to_clock_t(cutime);
- -      tms->tms_cstime = cputime_to_clock_t(cstime);
+ +      tms->tms_utime = nsec_to_clock_t(tgutime);
+ +      tms->tms_stime = nsec_to_clock_t(tgstime);
+ +      tms->tms_cutime = nsec_to_clock_t(cutime);
+ +      tms->tms_cstime = nsec_to_clock_t(cstime);
   }
   
   SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
@@@ -1544,7 -1544,7 +1544,7 @@@ static void k_getrusage(struct task_str
   {
         struct task_struct *t;
         unsigned long flags;
- -      cputime_t tgutime, tgstime, utime, stime;
+ +      u64 tgutime, tgstime, utime, stime;
         unsigned long maxrss = 0;
   
         memset((char *)r, 0, sizeof (*r));
@@@ -1600,8 -1600,8 +1600,8 @@@
         unlock_task_sighand(p, &flags);
   
   out:
- -      cputime_to_timeval(utime, &r->ru_utime);
- -      cputime_to_timeval(stime, &r->ru_stime);
+ +      r->ru_utime = ns_to_timeval(utime);
+ +      r->ru_stime = ns_to_timeval(stime);
   
         if (who != RUSAGE_CHILDREN) {
                 struct mm_struct *mm = get_task_mm(p);
@@@ -2063,6 -2063,24 +2063,24 @@@ static int prctl_get_tid_address(struc
   }
   #endif
   
+ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
+ {
+       /*
+        * If task has has_child_subreaper - all its decendants
+        * already have these flag too and new decendants will
+        * inherit it on fork, skip them.
+        *
+        * If we've found child_reaper - skip descendants in
+        * it's subtree as they will never get out pidns.
+        */
+       if (p->signal->has_child_subreaper ||
+           is_child_reaper(task_pid(p)))
+               return 0;
+ 
+       p->signal->has_child_subreaper = 1;
+       return 1;
+ }
+ 
   SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                 unsigned long, arg4, unsigned long, arg5)
   {
@@@ -2214,6 -2232,10 +2232,10 @@@
                 break;
         case PR_SET_CHILD_SUBREAPER:
                 me->signal->is_child_subreaper = !!arg2;
+               if (!arg2)
+                       break;
+ 
+               walk_process_tree(me, propagate_has_child_subreaper, NULL);
                 break;
         case PR_GET_CHILD_SUBREAPER:
                 error = put_user(me->signal->is_child_subreaper,
diff --combined kernel/ucount.c

index 95c6336,6871640..8a11fc0
--- 1/kernel/ucount.c
--- 2/kernel/ucount.c
+++ b/kernel/ucount.c
@@@ -57,7 -57,7 +57,7 @@@ static struct ctl_table_root set_root 
   
   static int zero = 0;
   static int int_max = INT_MAX;
- #define UCOUNT_ENTRY(name)                            \
+ #define UCOUNT_ENTRY(name)                            \
         {                                               \
                 .procname       = name,                 \
                 .maxlen         = sizeof(int),          \
@@@ -74,6 -74,10 +74,10 @@@ static struct ctl_table user_table[] = 
         UCOUNT_ENTRY("max_net_namespaces"),
         UCOUNT_ENTRY("max_mnt_namespaces"),
         UCOUNT_ENTRY("max_cgroup_namespaces"),
+ #ifdef CONFIG_INOTIFY_USER
+       UCOUNT_ENTRY("max_inotify_instances"),
+       UCOUNT_ENTRY("max_inotify_watches"),
+ #endif
         { }
   };
   #endif /* CONFIG_SYSCTL */
@@@ -227,10 -231,11 +231,10 @@@ static __init int user_namespace_sysctl
          * properly.
          */
         user_header = register_sysctl("user", empty);
+ +      kmemleak_ignore(user_header);
         BUG_ON(!user_header);
         BUG_ON(!setup_userns_sysctls(&init_user_ns));
   #endif
         return 0;
   }
   subsys_initcall(user_namespace_sysctl_init);
- -
- -
diff --combined security/apparmor/domain.c

index ef4beef,04185b7..001e133
--- 1/security/apparmor/domain.c
--- 2/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@@ -29,7 -29,6 +29,7 @@@
   #include "include/match.h"
   #include "include/path.h"
   #include "include/policy.h"
+ +#include "include/policy_ns.h"
   
   /**
    * aa_free_domain_entries - free entries in a domain table
@@@ -94,7 -93,7 +94,7 @@@ out
    * Returns: permission set
    */
   static struct file_perms change_profile_perms(struct aa_profile *profile,
- -                                            struct aa_namespace *ns,
+ +                                            struct aa_ns *ns,
                                               const char *name, u32 request,
                                               unsigned int start)
   {
@@@ -171,7 -170,7 +171,7 @@@ static struct aa_profile *__attach_matc
    *
    * Returns: profile or NULL if no match found
    */
- -static struct aa_profile *find_attach(struct aa_namespace *ns,
+ +static struct aa_profile *find_attach(struct aa_ns *ns,
                                       struct list_head *list, const char *name)
   {
         struct aa_profile *profile;
@@@ -240,7 -239,7 +240,7 @@@ static const char *next_name(int xtype
   static struct aa_profile *x_table_lookup(struct aa_profile *profile, u32 xindex)
   {
         struct aa_profile *new_profile = NULL;
- -      struct aa_namespace *ns = profile->ns;
+ +      struct aa_ns *ns = profile->ns;
         u32 xtype = xindex & AA_X_TYPE_MASK;
         int index = xindex & AA_X_INDEX_MASK;
         const char *name;
@@@ -248,7 -247,7 +248,7 @@@
         /* index is guaranteed to be in range, validated at load time */
         for (name = profile->file.trans.table[index]; !new_profile && name;
              name = next_name(xtype, name)) {
- -              struct aa_namespace *new_ns;
+ +              struct aa_ns *new_ns;
                 const char *xname = NULL;
   
                 new_ns = NULL;
@@@ -268,7 -267,7 +268,7 @@@
                                 ;
                         }
                         /* released below */
- -                      new_ns = aa_find_namespace(ns, ns_name);
+ +                      new_ns = aa_find_ns(ns, ns_name);
                         if (!new_ns)
                                 continue;
                 } else if (*name == '@') {
@@@ -281,7 -280,7 +281,7 @@@
   
                 /* released by caller */
                 new_profile = aa_lookup_profile(new_ns ? new_ns : ns, xname);
- -              aa_put_namespace(new_ns);
+ +              aa_put_ns(new_ns);
         }
   
         /* released by caller */
@@@ -302,7 -301,7 +302,7 @@@ static struct aa_profile *x_to_profile(
                                        const char *name, u32 xindex)
   {
         struct aa_profile *new_profile = NULL;
- -      struct aa_namespace *ns = profile->ns;
+ +      struct aa_ns *ns = profile->ns;
         u32 xtype = xindex & AA_X_TYPE_MASK;
   
         switch (xtype) {
@@@ -337,9 -336,9 +337,9 @@@
    */
   int apparmor_bprm_set_creds(struct linux_binprm *bprm)
   {
- -      struct aa_task_cxt *cxt;
+ +      struct aa_task_ctx *ctx;
         struct aa_profile *profile, *new_profile = NULL;
- -      struct aa_namespace *ns;
+ +      struct aa_ns *ns;
         char *buffer = NULL;
         unsigned int state;
         struct file_perms perms = {};
@@@ -353,10 -352,10 +353,10 @@@
         if (bprm->cred_prepared)
                 return 0;
   
- -      cxt = cred_cxt(bprm->cred);
- -      BUG_ON(!cxt);
+ +      ctx = cred_ctx(bprm->cred);
+ +      AA_BUG(!ctx);
   
- -      profile = aa_get_newest_profile(cxt->profile);
+ +      profile = aa_get_newest_profile(ctx->profile);
         /*
          * get the namespace from the replacement profile as replacement
          * can change the namespace
@@@ -380,9 -379,9 +380,9 @@@
          */
         if (unconfined(profile)) {
                 /* unconfined task */
- -              if (cxt->onexec)
+ +              if (ctx->onexec)
                         /* change_profile on exec already been granted */
- -                      new_profile = aa_get_profile(cxt->onexec);
+ +                      new_profile = aa_get_profile(ctx->onexec);
                 else
                         new_profile = find_attach(ns, &ns->base.profiles, name);
                 if (!new_profile)
@@@ -397,10 -396,10 +397,10 @@@
   
         /* find exec permissions for name */
         state = aa_str_perms(profile->file.dfa, state, name, &cond, &perms);
- -      if (cxt->onexec) {
+ +      if (ctx->onexec) {
                 struct file_perms cp;
                 info = "change_profile onexec";
- -              new_profile = aa_get_newest_profile(cxt->onexec);
+ +              new_profile = aa_get_newest_profile(ctx->onexec);
                 if (!(perms.allow & AA_MAY_ONEXEC))
                         goto audit;
   
@@@ -409,8 -408,8 +409,8 @@@
                  * exec\0change_profile
                  */
                 state = aa_dfa_null_transition(profile->file.dfa, state);
- -              cp = change_profile_perms(profile, cxt->onexec->ns,
- -                                        cxt->onexec->base.name,
+ +              cp = change_profile_perms(profile, ctx->onexec->ns,
+ +                                        ctx->onexec->base.name,
                                           AA_MAY_ONEXEC, state);
   
                 if (!(cp.allow & AA_MAY_ONEXEC))
@@@ -442,8 -441,7 +442,8 @@@
                 }
         } else if (COMPLAIN_MODE(profile)) {
                 /* no exec permission - are we in learning mode */
- -              new_profile = aa_new_null_profile(profile, 0);
+ +              new_profile = aa_new_null_profile(profile, false, name,
+ +                                                GFP_ATOMIC);
                 if (!new_profile) {
                         error = -ENOMEM;
                         info = "could not create null profile";
@@@ -471,7 -469,7 +471,7 @@@
                 ;
         }
   
-       if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+       if (bprm->unsafe & LSM_UNSAFE_PTRACE) {
                 error = may_change_ptraced_domain(new_profile);
                 if (error)
                         goto audit;
@@@ -499,16 -497,17 +499,16 @@@ apply
         bprm->per_clear |= PER_CLEAR_ON_SETID;
   
   x_clear:
- -      aa_put_profile(cxt->profile);
- -      /* transfer new profile reference will be released when cxt is freed */
- -      cxt->profile = new_profile;
+ +      aa_put_profile(ctx->profile);
+ +      /* transfer new profile reference will be released when ctx is freed */
+ +      ctx->profile = new_profile;
         new_profile = NULL;
   
         /* clear out all temporary/transitional state from the context */
- -      aa_clear_task_cxt_trans(cxt);
+ +      aa_clear_task_ctx_trans(ctx);
   
   audit:
- -      error = aa_audit_file(profile, &perms, GFP_KERNEL, OP_EXEC, MAY_EXEC,
- -                            name,
+ +      error = aa_audit_file(profile, &perms, OP_EXEC, MAY_EXEC, name,
                               new_profile ? new_profile->base.hname : NULL,
                               cond.uid, info, error);
   
@@@ -544,17 -543,17 +544,17 @@@ int apparmor_bprm_secureexec(struct lin
   void apparmor_bprm_committing_creds(struct linux_binprm *bprm)
   {
         struct aa_profile *profile = __aa_current_profile();
- -      struct aa_task_cxt *new_cxt = cred_cxt(bprm->cred);
+ +      struct aa_task_ctx *new_ctx = cred_ctx(bprm->cred);
   
         /* bail out if unconfined or not changing profile */
- -      if ((new_cxt->profile == profile) ||
- -          (unconfined(new_cxt->profile)))
+ +      if ((new_ctx->profile == profile) ||
+ +          (unconfined(new_ctx->profile)))
                 return;
   
         current->pdeath_signal = 0;
   
         /* reset soft limits and set hard limits for the new profile */
- -      __aa_transition_rlimits(profile, new_cxt->profile);
+ +      __aa_transition_rlimits(profile, new_ctx->profile);
   }
   
   /**
@@@ -603,7 -602,7 +603,7 @@@ static char *new_compound_name(const ch
   int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
   {
         const struct cred *cred;
- -      struct aa_task_cxt *cxt;
+ +      struct aa_task_ctx *ctx;
         struct aa_profile *profile, *previous_profile, *hat = NULL;
         char *name = NULL;
         int i;
@@@ -621,9 -620,9 +621,9 @@@
   
         /* released below */
         cred = get_current_cred();
- -      cxt = cred_cxt(cred);
+ +      ctx = cred_ctx(cred);
         profile = aa_get_newest_profile(aa_cred_profile(cred));
- -      previous_profile = aa_get_newest_profile(cxt->previous);
+ +      previous_profile = aa_get_newest_profile(ctx->previous);
   
         if (unconfined(profile)) {
                 info = "unconfined";
@@@ -667,8 -666,7 +667,8 @@@
                         aa_put_profile(root);
                         target = name;
                         /* released below */
- -                      hat = aa_new_null_profile(profile, 1);
+ +                      hat = aa_new_null_profile(profile, true, hats[0],
+ +                                                GFP_KERNEL);
                         if (!hat) {
                                 info = "failed null profile create";
                                 error = -ENOMEM;
@@@ -713,9 -711,9 +713,9 @@@
   
   audit:
         if (!permtest)
- -              error = aa_audit_file(profile, &perms, GFP_KERNEL,
- -                                    OP_CHANGE_HAT, AA_MAY_CHANGEHAT, NULL,
- -                                    target, GLOBAL_ROOT_UID, info, error);
+ +              error = aa_audit_file(profile, &perms, OP_CHANGE_HAT,
+ +                                    AA_MAY_CHANGEHAT, NULL, target,
+ +                                    GLOBAL_ROOT_UID, info, error);
   
   out:
         aa_put_profile(hat);
@@@ -729,7 -727,8 +729,7 @@@
   
   /**
    * aa_change_profile - perform a one-way profile transition
- - * @ns_name: name of the profile namespace to change to (MAYBE NULL)
- - * @hname: name of profile to change to (MAYBE NULL)
+ + * @fqname: name of profile may include namespace (NOT NULL)
    * @onexec: whether this transition is to take place immediately or at exec
    * @permtest: true if this is just a permission test
    *
@@@ -741,20 -740,19 +741,20 @@@
    *
    * Returns %0 on success, error otherwise.
    */
- -int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
- -                    bool permtest)
+ +int aa_change_profile(const char *fqname, bool onexec,
+ +                    bool permtest, bool stack)
   {
         const struct cred *cred;
         struct aa_profile *profile, *target = NULL;
- -      struct aa_namespace *ns = NULL;
         struct file_perms perms = {};
- -      const char *name = NULL, *info = NULL;
- -      int op, error = 0;
+ +      const char *info = NULL, *op;
+ +      int error = 0;
         u32 request;
   
- -      if (!hname && !ns_name)
+ +      if (!fqname || !*fqname) {
+ +              AA_DEBUG("no profile name");
                 return -EINVAL;
+ +      }
   
         if (onexec) {
                 request = AA_MAY_ONEXEC;
@@@ -779,15 -777,44 +779,15 @@@
                 return -EPERM;
         }
   
- -      if (ns_name) {
- -              /* released below */
- -              ns = aa_find_namespace(profile->ns, ns_name);
- -              if (!ns) {
- -                      /* we don't create new namespace in complain mode */
- -                      name = ns_name;
- -                      info = "namespace not found";
- -                      error = -ENOENT;
- -                      goto audit;
- -              }
- -      } else
- -              /* released below */
- -              ns = aa_get_namespace(profile->ns);
- -
- -      /* if the name was not specified, use the name of the current profile */
- -      if (!hname) {
- -              if (unconfined(profile))
- -                      hname = ns->unconfined->base.hname;
- -              else
- -                      hname = profile->base.hname;
- -      }
- -
- -      perms = change_profile_perms(profile, ns, hname, request,
- -                                   profile->file.start);
- -      if (!(perms.allow & request)) {
- -              error = -EACCES;
- -              goto audit;
- -      }
- -
- -      /* released below */
- -      target = aa_lookup_profile(ns, hname);
+ +      target = aa_fqlookupn_profile(profile, fqname, strlen(fqname));
         if (!target) {
                 info = "profile not found";
                 error = -ENOENT;
                 if (permtest || !COMPLAIN_MODE(profile))
                         goto audit;
                 /* released below */
- -              target = aa_new_null_profile(profile, 0);
+ +              target = aa_new_null_profile(profile, false, fqname,
+ +                                           GFP_KERNEL);
                 if (!target) {
                         info = "failed null profile create";
                         error = -ENOMEM;
@@@ -795,13 -822,6 +795,13 @@@
                 }
         }
   
+ +      perms = change_profile_perms(profile, target->ns, target->base.hname,
+ +                                   request, profile->file.start);
+ +      if (!(perms.allow & request)) {
+ +              error = -EACCES;
+ +              goto audit;
+ +      }
+ +
         /* check if tracing task is allowed to trace target domain */
         error = may_change_ptraced_domain(target);
         if (error) {
@@@ -819,9 -839,10 +819,9 @@@
   
   audit:
         if (!permtest)
- -              error = aa_audit_file(profile, &perms, GFP_KERNEL, op, request,
- -                                    name, hname, GLOBAL_ROOT_UID, info, error);
+ +              error = aa_audit_file(profile, &perms, op, request, NULL,
+ +                                    fqname, GLOBAL_ROOT_UID, info, error);
   
- -      aa_put_namespace(ns);
         aa_put_profile(target);
         put_cred(cred);
   
diff --combined security/commoncap.c

index 6d4d586,8ec6b7f..78b3783
--- 1/security/commoncap.c
--- 2/security/commoncap.c
+++ b/security/commoncap.c
@@@ -548,9 -548,10 +548,10 @@@ skip
   
         if ((is_setid ||
              !cap_issubset(new->cap_permitted, old->cap_permitted)) &&
-           bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
+           ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
+            !ptracer_capable(current, new->user_ns))) {
                 /* downgrade; they get no more than they had, and maybe less */
-               if (!capable(CAP_SETUID) ||
+               if (!ns_capable(new->user_ns, CAP_SETUID) ||
                     (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
                         new->euid = new->uid;
                         new->egid = new->gid;
@@@ -1093,8 -1094,7 +1094,8 @@@ struct security_hook_list capability_ho
   
   void __init capability_add_hooks(void)
   {
- -      security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks));
+ +      security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
+ +                              "capability");
   }
   
   #endif /* CONFIG_SECURITY */
diff --combined security/selinux/hooks.c

index e6b1b74,cece6fe..9a8f12f
--- 1/security/selinux/hooks.c
--- 2/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@@ -210,6 -210,16 +210,6 @@@ static inline u32 task_sid(const struc
         return sid;
   }
   
- -/*
- - * get the subjective security ID of the current task
- - */
- -static inline u32 current_sid(void)
- -{
- -      const struct task_security_struct *tsec = current_security();
- -
- -      return tsec->sid;
- -}
- -
   /* Allocate and free functions for each kind of security blob. */
   
   static int inode_alloc_security(struct inode *inode)
@@@ -480,11 -490,8 +480,11 @@@ static int selinux_is_sblabel_mnt(struc
                 sbsec->behavior == SECURITY_FS_USE_NATIVE ||
                 /* Special handling. Genfs but also in-core setxattr handler */
                 !strcmp(sb->s_type->name, "sysfs") ||
+ +              !strcmp(sb->s_type->name, "cgroup") ||
+ +              !strcmp(sb->s_type->name, "cgroup2") ||
                 !strcmp(sb->s_type->name, "pstore") ||
                 !strcmp(sb->s_type->name, "debugfs") ||
+ +              !strcmp(sb->s_type->name, "tracefs") ||
                 !strcmp(sb->s_type->name, "rootfs");
   }
   
@@@ -826,14 -833,10 +826,14 @@@ static int selinux_set_mnt_opts(struct 
         }
   
         /*
- -       * If this is a user namespace mount, no contexts are allowed
- -       * on the command line and security labels must be ignored.
+ +       * If this is a user namespace mount and the filesystem type is not
+ +       * explicitly whitelisted, then no contexts are allowed on the command
+ +       * line and security labels must be ignored.
          */
- -      if (sb->s_user_ns != &init_user_ns) {
+ +      if (sb->s_user_ns != &init_user_ns &&
+ +          strcmp(sb->s_type->name, "tmpfs") &&
+ +          strcmp(sb->s_type->name, "ramfs") &&
+ +          strcmp(sb->s_type->name, "devpts")) {
                 if (context_sid || fscontext_sid || rootcontext_sid ||
                     defcontext_sid) {
                         rc = -EACCES;
@@@ -1265,8 -1268,6 +1265,8 @@@ static inline int default_protocol_dgra
   
   static inline u16 socket_type_to_security_class(int family, int type, int protocol)
   {
+ +      int extsockclass = selinux_policycap_extsockclass;
+ +
         switch (family) {
         case PF_UNIX:
                 switch (type) {
@@@ -1281,19 -1282,13 +1281,19 @@@
         case PF_INET6:
                 switch (type) {
                 case SOCK_STREAM:
+ +              case SOCK_SEQPACKET:
                         if (default_protocol_stream(protocol))
                                 return SECCLASS_TCP_SOCKET;
+ +                      else if (extsockclass && protocol == IPPROTO_SCTP)
+ +                              return SECCLASS_SCTP_SOCKET;
                         else
                                 return SECCLASS_RAWIP_SOCKET;
                 case SOCK_DGRAM:
                         if (default_protocol_dgram(protocol))
                                 return SECCLASS_UDP_SOCKET;
+ +                      else if (extsockclass && (protocol == IPPROTO_ICMP ||
+ +                                                protocol == IPPROTO_ICMPV6))
+ +                              return SECCLASS_ICMP_SOCKET;
                         else
                                 return SECCLASS_RAWIP_SOCKET;
                 case SOCK_DCCP:
@@@ -1347,68 -1342,6 +1347,68 @@@
                 return SECCLASS_APPLETALK_SOCKET;
         }
   
+ +      if (extsockclass) {
+ +              switch (family) {
+ +              case PF_AX25:
+ +                      return SECCLASS_AX25_SOCKET;
+ +              case PF_IPX:
+ +                      return SECCLASS_IPX_SOCKET;
+ +              case PF_NETROM:
+ +                      return SECCLASS_NETROM_SOCKET;
+ +              case PF_ATMPVC:
+ +                      return SECCLASS_ATMPVC_SOCKET;
+ +              case PF_X25:
+ +                      return SECCLASS_X25_SOCKET;
+ +              case PF_ROSE:
+ +                      return SECCLASS_ROSE_SOCKET;
+ +              case PF_DECnet:
+ +                      return SECCLASS_DECNET_SOCKET;
+ +              case PF_ATMSVC:
+ +                      return SECCLASS_ATMSVC_SOCKET;
+ +              case PF_RDS:
+ +                      return SECCLASS_RDS_SOCKET;
+ +              case PF_IRDA:
+ +                      return SECCLASS_IRDA_SOCKET;
+ +              case PF_PPPOX:
+ +                      return SECCLASS_PPPOX_SOCKET;
+ +              case PF_LLC:
+ +                      return SECCLASS_LLC_SOCKET;
+ +              case PF_CAN:
+ +                      return SECCLASS_CAN_SOCKET;
+ +              case PF_TIPC:
+ +                      return SECCLASS_TIPC_SOCKET;
+ +              case PF_BLUETOOTH:
+ +                      return SECCLASS_BLUETOOTH_SOCKET;
+ +              case PF_IUCV:
+ +                      return SECCLASS_IUCV_SOCKET;
+ +              case PF_RXRPC:
+ +                      return SECCLASS_RXRPC_SOCKET;
+ +              case PF_ISDN:
+ +                      return SECCLASS_ISDN_SOCKET;
+ +              case PF_PHONET:
+ +                      return SECCLASS_PHONET_SOCKET;
+ +              case PF_IEEE802154:
+ +                      return SECCLASS_IEEE802154_SOCKET;
+ +              case PF_CAIF:
+ +                      return SECCLASS_CAIF_SOCKET;
+ +              case PF_ALG:
+ +                      return SECCLASS_ALG_SOCKET;
+ +              case PF_NFC:
+ +                      return SECCLASS_NFC_SOCKET;
+ +              case PF_VSOCK:
+ +                      return SECCLASS_VSOCK_SOCKET;
+ +              case PF_KCM:
+ +                      return SECCLASS_KCM_SOCKET;
+ +              case PF_QIPCRTR:
+ +                      return SECCLASS_QIPCRTR_SOCKET;
+ +              case PF_SMC:
+ +                      return SECCLASS_SMC_SOCKET;
+ +#if PF_MAX > 44
+ +#error New address family defined, please update this function.
+ +#endif
+ +              }
+ +      }
+ +
         return SECCLASS_SOCKET;
   }
   
@@@ -1675,6 -1608,55 +1675,6 @@@ static inline u32 signal_to_av(int sig
         return perm;
   }
   
- -/*
- - * Check permission between a pair of credentials
- - * fork check, ptrace check, etc.
- - */
- -static int cred_has_perm(const struct cred *actor,
- -                       const struct cred *target,
- -                       u32 perms)
- -{
- -      u32 asid = cred_sid(actor), tsid = cred_sid(target);
- -
- -      return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL);
- -}
- -
- -/*
- - * Check permission between a pair of tasks, e.g. signal checks,
- - * fork check, ptrace check, etc.
- - * tsk1 is the actor and tsk2 is the target
- - * - this uses the default subjective creds of tsk1
- - */
- -static int task_has_perm(const struct task_struct *tsk1,
- -                       const struct task_struct *tsk2,
- -                       u32 perms)
- -{
- -      const struct task_security_struct *__tsec1, *__tsec2;
- -      u32 sid1, sid2;
- -
- -      rcu_read_lock();
- -      __tsec1 = __task_cred(tsk1)->security;  sid1 = __tsec1->sid;
- -      __tsec2 = __task_cred(tsk2)->security;  sid2 = __tsec2->sid;
- -      rcu_read_unlock();
- -      return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL);
- -}
- -
- -/*
- - * Check permission between current and another task, e.g. signal checks,
- - * fork check, ptrace check, etc.
- - * current is the actor and tsk2 is the target
- - * - this uses current's subjective creds
- - */
- -static int current_has_perm(const struct task_struct *tsk,
- -                          u32 perms)
- -{
- -      u32 sid, tsid;
- -
- -      sid = current_sid();
- -      tsid = task_sid(tsk);
- -      return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL);
- -}
- -
   #if CAP_LAST_CAP > 63
   #error Fix SELinux to handle capabilities > 63.
   #endif
@@@ -1716,6 -1698,16 +1716,6 @@@ static int cred_has_capability(const st
         return rc;
   }
   
- -/* Check whether a task is allowed to use a system operation. */
- -static int task_has_system(struct task_struct *tsk,
- -                         u32 perms)
- -{
- -      u32 sid = task_sid(tsk);
- -
- -      return avc_has_perm(sid, SECINITSID_KERNEL,
- -                          SECCLASS_SYSTEM, perms, NULL);
- -}
- -
   /* Check whether a task has a particular permission to an inode.
      The 'adp' parameter is optional and allows other audit
      data to be passed (e.g. the dentry). */
@@@ -1887,6 -1879,15 +1887,6 @@@ static int may_create(struct inode *dir
                             FILESYSTEM__ASSOCIATE, &ad);
   }
   
- -/* Check whether a task can create a key. */
- -static int may_create_key(u32 ksid,
- -                        struct task_struct *ctx)
- -{
- -      u32 sid = task_sid(ctx);
- -
- -      return avc_has_perm(sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL);
- -}
- -
   #define MAY_LINK      0
   #define MAY_UNLINK    1
   #define MAY_RMDIR     2
@@@ -2142,26 -2143,24 +2142,26 @@@ static int selinux_binder_transfer_file
   static int selinux_ptrace_access_check(struct task_struct *child,
                                      unsigned int mode)
   {
- -      if (mode & PTRACE_MODE_READ) {
- -              u32 sid = current_sid();
- -              u32 csid = task_sid(child);
+ +      u32 sid = current_sid();
+ +      u32 csid = task_sid(child);
+ +
+ +      if (mode & PTRACE_MODE_READ)
                 return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL);
- -      }
   
- -      return current_has_perm(child, PROCESS__PTRACE);
+ +      return avc_has_perm(sid, csid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL);
   }
   
   static int selinux_ptrace_traceme(struct task_struct *parent)
   {
- -      return task_has_perm(parent, current, PROCESS__PTRACE);
+ +      return avc_has_perm(task_sid(parent), current_sid(), SECCLASS_PROCESS,
+ +                          PROCESS__PTRACE, NULL);
   }
   
   static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
                           kernel_cap_t *inheritable, kernel_cap_t *permitted)
   {
- -      return current_has_perm(target, PROCESS__GETCAP);
+ +      return avc_has_perm(current_sid(), task_sid(target), SECCLASS_PROCESS,
+ +                          PROCESS__GETCAP, NULL);
   }
   
   static int selinux_capset(struct cred *new, const struct cred *old,
@@@ -2169,8 -2168,7 +2169,8 @@@
                           const kernel_cap_t *inheritable,
                           const kernel_cap_t *permitted)
   {
- -      return cred_has_perm(old, new, PROCESS__SETCAP);
+ +      return avc_has_perm(cred_sid(old), cred_sid(new), SECCLASS_PROCESS,
+ +                          PROCESS__SETCAP, NULL);
   }
   
   /*
@@@ -2226,22 -2224,29 +2226,22 @@@ static int selinux_quota_on(struct dent
   
   static int selinux_syslog(int type)
   {
- -      int rc;
- -
         switch (type) {
         case SYSLOG_ACTION_READ_ALL:    /* Read last kernel messages */
         case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */
- -              rc = task_has_system(current, SYSTEM__SYSLOG_READ);
- -              break;
+ +              return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ +                                  SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, NULL);
         case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */
         case SYSLOG_ACTION_CONSOLE_ON:  /* Enable logging to console */
         /* Set level of messages printed to console */
         case SYSLOG_ACTION_CONSOLE_LEVEL:
- -              rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE);
- -              break;
- -      case SYSLOG_ACTION_CLOSE:       /* Close log */
- -      case SYSLOG_ACTION_OPEN:        /* Open log */
- -      case SYSLOG_ACTION_READ:        /* Read from log */
- -      case SYSLOG_ACTION_READ_CLEAR:  /* Read/clear last kernel messages */
- -      case SYSLOG_ACTION_CLEAR:       /* Clear ring buffer */
- -      default:
- -              rc = task_has_system(current, SYSTEM__SYSLOG_MOD);
- -              break;
+ +              return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ +                                  SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE,
+ +                                  NULL);
         }
- -      return rc;
+ +      /* All other syslog types */
+ +      return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ +                          SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, NULL);
   }
   
   /*
@@@ -2266,13 -2271,13 +2266,13 @@@ static int selinux_vm_enough_memory(str
   
   /* binprm security operations */
   
- -static u32 ptrace_parent_sid(struct task_struct *task)
+ +static u32 ptrace_parent_sid(void)
   {
         u32 sid = 0;
         struct task_struct *tracer;
   
         rcu_read_lock();
- -      tracer = ptrace_parent(task);
+ +      tracer = ptrace_parent(current);
         if (tracer)
                 sid = task_sid(tracer);
         rcu_read_unlock();
@@@ -2399,9 -2404,8 +2399,8 @@@ static int selinux_bprm_set_creds(struc
   
                 /* Make sure that anyone attempting to ptrace over a task that
                  * changes its SID has the appropriate permit */
-               if (bprm->unsafe &
-                   (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+               if (bprm->unsafe & LSM_UNSAFE_PTRACE) {
- -                      u32 ptsid = ptrace_parent_sid(current);
+ +                      u32 ptsid = ptrace_parent_sid();
                         if (ptsid != 0) {
                                 rc = avc_has_perm(ptsid, new_tsec->sid,
                                                   SECCLASS_PROCESS,
@@@ -3498,7 -3502,6 +3497,7 @@@ static int default_noexec
   static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
   {
         const struct cred *cred = current_cred();
+ +      u32 sid = cred_sid(cred);
         int rc = 0;
   
         if (default_noexec &&
@@@ -3509,8 -3512,7 +3508,8 @@@
                  * private file mapping that will also be writable.
                  * This has an additional check.
                  */
- -              rc = cred_has_perm(cred, cred, PROCESS__EXECMEM);
+ +              rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ +                                PROCESS__EXECMEM, NULL);
                 if (rc)
                         goto error;
         }
@@@ -3561,7 -3563,6 +3560,7 @@@ static int selinux_file_mprotect(struc
                                  unsigned long prot)
   {
         const struct cred *cred = current_cred();
+ +      u32 sid = cred_sid(cred);
   
         if (selinux_checkreqprot)
                 prot = reqprot;
@@@ -3571,14 -3572,12 +3570,14 @@@
                 int rc = 0;
                 if (vma->vm_start >= vma->vm_mm->start_brk &&
                     vma->vm_end <= vma->vm_mm->brk) {
- -                      rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP);
+ +                      rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ +                                        PROCESS__EXECHEAP, NULL);
                 } else if (!vma->vm_file &&
                            ((vma->vm_start <= vma->vm_mm->start_stack &&
                              vma->vm_end >= vma->vm_mm->start_stack) ||
                             vma_is_stack_for_current(vma))) {
- -                      rc = current_has_perm(current, PROCESS__EXECSTACK);
+ +                      rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ +                                        PROCESS__EXECSTACK, NULL);
                 } else if (vma->vm_file && vma->anon_vma) {
                         /*
                          * We are making executable a file mapping that has
@@@ -3711,9 -3710,7 +3710,9 @@@ static int selinux_file_open(struct fil
   
   static int selinux_task_create(unsigned long clone_flags)
   {
- -      return current_has_perm(current, PROCESS__FORK);
+ +      u32 sid = current_sid();
+ +
+ +      return avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL);
   }
   
   /*
@@@ -3823,12 -3820,15 +3822,12 @@@ static int selinux_kernel_create_files_
   
   static int selinux_kernel_module_request(char *kmod_name)
   {
- -      u32 sid;
         struct common_audit_data ad;
   
- -      sid = task_sid(current);
- -
         ad.type = LSM_AUDIT_DATA_KMOD;
         ad.u.kmod_name = kmod_name;
   
- -      return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM,
+ +      return avc_has_perm(current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM,
                             SYSTEM__MODULE_REQUEST, &ad);
   }
   
@@@ -3880,20 -3880,17 +3879,20 @@@ static int selinux_kernel_read_file(str
   
   static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
   {
- -      return current_has_perm(p, PROCESS__SETPGID);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__SETPGID, NULL);
   }
   
   static int selinux_task_getpgid(struct task_struct *p)
   {
- -      return current_has_perm(p, PROCESS__GETPGID);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__GETPGID, NULL);
   }
   
   static int selinux_task_getsid(struct task_struct *p)
   {
- -      return current_has_perm(p, PROCESS__GETSESSION);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__GETSESSION, NULL);
   }
   
   static void selinux_task_getsecid(struct task_struct *p, u32 *secid)
@@@ -3903,20 -3900,17 +3902,20 @@@
   
   static int selinux_task_setnice(struct task_struct *p, int nice)
   {
- -      return current_has_perm(p, PROCESS__SETSCHED);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__SETSCHED, NULL);
   }
   
   static int selinux_task_setioprio(struct task_struct *p, int ioprio)
   {
- -      return current_has_perm(p, PROCESS__SETSCHED);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__SETSCHED, NULL);
   }
   
   static int selinux_task_getioprio(struct task_struct *p)
   {
- -      return current_has_perm(p, PROCESS__GETSCHED);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__GETSCHED, NULL);
   }
   
   static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource,
@@@ -3929,42 -3923,47 +3928,42 @@@
            later be used as a safe reset point for the soft limit
            upon context transitions.  See selinux_bprm_committing_creds. */
         if (old_rlim->rlim_max != new_rlim->rlim_max)
- -              return current_has_perm(p, PROCESS__SETRLIMIT);
+ +              return avc_has_perm(current_sid(), task_sid(p),
+ +                                  SECCLASS_PROCESS, PROCESS__SETRLIMIT, NULL);
   
         return 0;
   }
   
   static int selinux_task_setscheduler(struct task_struct *p)
   {
- -      return current_has_perm(p, PROCESS__SETSCHED);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__SETSCHED, NULL);
   }
   
   static int selinux_task_getscheduler(struct task_struct *p)
   {
- -      return current_has_perm(p, PROCESS__GETSCHED);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__GETSCHED, NULL);
   }
   
   static int selinux_task_movememory(struct task_struct *p)
   {
- -      return current_has_perm(p, PROCESS__SETSCHED);
+ +      return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ +                          PROCESS__SETSCHED, NULL);
   }
   
   static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
                                 int sig, u32 secid)
   {
         u32 perm;
- -      int rc;
   
         if (!sig)
                 perm = PROCESS__SIGNULL; /* null signal; existence test */
         else
                 perm = signal_to_av(sig);
- -      if (secid)
- -              rc = avc_has_perm(secid, task_sid(p),
- -                                SECCLASS_PROCESS, perm, NULL);
- -      else
- -              rc = current_has_perm(p, perm);
- -      return rc;
- -}
- -
- -static int selinux_task_wait(struct task_struct *p)
- -{
- -      return task_has_perm(p, current, PROCESS__SIGCHLD);
+ +      if (!secid)
+ +              secid = current_sid();
+ +      return avc_has_perm(secid, task_sid(p), SECCLASS_PROCESS, perm, NULL);
   }
   
   static void selinux_task_to_inode(struct task_struct *p,
@@@ -4254,11 -4253,12 +4253,11 @@@ static int socket_sockcreate_sid(const 
                                        socksid);
   }
   
- -static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms)
+ +static int sock_has_perm(struct sock *sk, u32 perms)
   {
         struct sk_security_struct *sksec = sk->sk_security;
         struct common_audit_data ad;
         struct lsm_network_audit net = {0,};
- -      u32 tsid = task_sid(task);
   
         if (sksec->sid == SECINITSID_KERNEL)
                 return 0;
@@@ -4267,8 -4267,7 +4266,8 @@@
         ad.u.net = &net;
         ad.u.net->sk = sk;
   
- -      return avc_has_perm(tsid, sksec->sid, sksec->sclass, perms, &ad);
+ +      return avc_has_perm(current_sid(), sksec->sid, sksec->sclass, perms,
+ +                          &ad);
   }
   
   static int selinux_socket_create(int family, int type,
@@@ -4330,7 -4329,7 +4329,7 @@@ static int selinux_socket_bind(struct s
         u16 family;
         int err;
   
- -      err = sock_has_perm(current, sk, SOCKET__BIND);
+ +      err = sock_has_perm(sk, SOCKET__BIND);
         if (err)
                 goto out;
   
@@@ -4365,8 -4364,7 +4364,8 @@@
   
                         inet_get_local_port_range(sock_net(sk), &low, &high);
   
- -                      if (snum < max(PROT_SOCK, low) || snum > high) {
+ +                      if (snum < max(inet_prot_sock(sock_net(sk)), low) ||
+ +                          snum > high) {
                                 err = sel_netport_sid(sk->sk_protocol,
                                                       snum, &sid);
                                 if (err)
@@@ -4430,7 -4428,7 +4429,7 @@@ static int selinux_socket_connect(struc
         struct sk_security_struct *sksec = sk->sk_security;
         int err;
   
- -      err = sock_has_perm(current, sk, SOCKET__CONNECT);
+ +      err = sock_has_perm(sk, SOCKET__CONNECT);
         if (err)
                 return err;
   
@@@ -4482,7 -4480,7 +4481,7 @@@ out
   
   static int selinux_socket_listen(struct socket *sock, int backlog)
   {
- -      return sock_has_perm(current, sock->sk, SOCKET__LISTEN);
+ +      return sock_has_perm(sock->sk, SOCKET__LISTEN);
   }
   
   static int selinux_socket_accept(struct socket *sock, struct socket *newsock)
@@@ -4493,7 -4491,7 +4492,7 @@@
         u16 sclass;
         u32 sid;
   
- -      err = sock_has_perm(current, sock->sk, SOCKET__ACCEPT);
+ +      err = sock_has_perm(sock->sk, SOCKET__ACCEPT);
         if (err)
                 return err;
   
@@@ -4514,30 -4512,30 +4513,30 @@@
   static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg,
                                   int size)
   {
- -      return sock_has_perm(current, sock->sk, SOCKET__WRITE);
+ +      return sock_has_perm(sock->sk, SOCKET__WRITE);
   }
   
   static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg,
                                   int size, int flags)
   {
- -      return sock_has_perm(current, sock->sk, SOCKET__READ);
+ +      return sock_has_perm(sock->sk, SOCKET__READ);
   }
   
   static int selinux_socket_getsockname(struct socket *sock)
   {
- -      return sock_has_perm(current, sock->sk, SOCKET__GETATTR);
+ +      return sock_has_perm(sock->sk, SOCKET__GETATTR);
   }
   
   static int selinux_socket_getpeername(struct socket *sock)
   {
- -      return sock_has_perm(current, sock->sk, SOCKET__GETATTR);
+ +      return sock_has_perm(sock->sk, SOCKET__GETATTR);
   }
   
   static int selinux_socket_setsockopt(struct socket *sock, int level, int optname)
   {
         int err;
   
- -      err = sock_has_perm(current, sock->sk, SOCKET__SETOPT);
+ +      err = sock_has_perm(sock->sk, SOCKET__SETOPT);
         if (err)
                 return err;
   
@@@ -4547,12 -4545,12 +4546,12 @@@
   static int selinux_socket_getsockopt(struct socket *sock, int level,
                                      int optname)
   {
- -      return sock_has_perm(current, sock->sk, SOCKET__GETOPT);
+ +      return sock_has_perm(sock->sk, SOCKET__GETOPT);
   }
   
   static int selinux_socket_shutdown(struct socket *sock, int how)
   {
- -      return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN);
+ +      return sock_has_perm(sock->sk, SOCKET__SHUTDOWN);
   }
   
   static int selinux_socket_unix_stream_connect(struct sock *sock,
@@@ -5040,7 -5038,7 +5039,7 @@@ static int selinux_nlmsg_perm(struct so
                 goto out;
         }
   
- -      err = sock_has_perm(current, sk, perm);
+ +      err = sock_has_perm(sk, perm);
   out:
         return err;
   }
@@@ -5371,17 -5369,20 +5370,17 @@@ static int selinux_netlink_send(struct 
         return selinux_nlmsg_perm(sk, skb);
   }
   
- -static int ipc_alloc_security(struct task_struct *task,
- -                            struct kern_ipc_perm *perm,
+ +static int ipc_alloc_security(struct kern_ipc_perm *perm,
                               u16 sclass)
   {
         struct ipc_security_struct *isec;
- -      u32 sid;
   
         isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL);
         if (!isec)
                 return -ENOMEM;
   
- -      sid = task_sid(task);
         isec->sclass = sclass;
- -      isec->sid = sid;
+ +      isec->sid = current_sid();
         perm->security = isec;
   
         return 0;
@@@ -5449,7 -5450,7 +5448,7 @@@ static int selinux_msg_queue_alloc_secu
         u32 sid = current_sid();
         int rc;
   
- -      rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ);
+ +      rc = ipc_alloc_security(&msq->q_perm, SECCLASS_MSGQ);
         if (rc)
                 return rc;
   
@@@ -5496,8 -5497,7 +5495,8 @@@ static int selinux_msg_queue_msgctl(str
         case IPC_INFO:
         case MSG_INFO:
                 /* No specific object, just general system-wide information. */
- -              return task_has_system(current, SYSTEM__IPC_INFO);
+ +              return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ +                                  SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
         case IPC_STAT:
         case MSG_STAT:
                 perms = MSGQ__GETATTR | MSGQ__ASSOCIATE;
@@@ -5591,7 -5591,7 +5590,7 @@@ static int selinux_shm_alloc_security(s
         u32 sid = current_sid();
         int rc;
   
- -      rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM);
+ +      rc = ipc_alloc_security(&shp->shm_perm, SECCLASS_SHM);
         if (rc)
                 return rc;
   
@@@ -5639,8 -5639,7 +5638,8 @@@ static int selinux_shm_shmctl(struct sh
         case IPC_INFO:
         case SHM_INFO:
                 /* No specific object, just general system-wide information. */
- -              return task_has_system(current, SYSTEM__IPC_INFO);
+ +              return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ +                                  SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
         case IPC_STAT:
         case SHM_STAT:
                 perms = SHM__GETATTR | SHM__ASSOCIATE;
@@@ -5684,7 -5683,7 +5683,7 @@@ static int selinux_sem_alloc_security(s
         u32 sid = current_sid();
         int rc;
   
- -      rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM);
+ +      rc = ipc_alloc_security(&sma->sem_perm, SECCLASS_SEM);
         if (rc)
                 return rc;
   
@@@ -5732,8 -5731,7 +5731,8 @@@ static int selinux_sem_semctl(struct se
         case IPC_INFO:
         case SEM_INFO:
                 /* No specific object, just general system-wide information. */
- -              return task_has_system(current, SYSTEM__IPC_INFO);
+ +              return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ +                                  SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
         case GETPID:
         case GETNCNT:
         case GETZCNT:
@@@ -5814,16 -5812,15 +5813,16 @@@ static int selinux_getprocattr(struct t
         int error;
         unsigned len;
   
+ +      rcu_read_lock();
+ +      __tsec = __task_cred(p)->security;
+ +
         if (current != p) {
- -              error = current_has_perm(p, PROCESS__GETATTR);
+ +              error = avc_has_perm(current_sid(), __tsec->sid,
+ +                                   SECCLASS_PROCESS, PROCESS__GETATTR, NULL);
                 if (error)
- -                      return error;
+ +                      goto bad;
         }
   
- -      rcu_read_lock();
- -      __tsec = __task_cred(p)->security;
- -
         if (!strcmp(name, "current"))
                 sid = __tsec->sid;
         else if (!strcmp(name, "prev"))
@@@ -5836,10 -5833,8 +5835,10 @@@
                 sid = __tsec->keycreate_sid;
         else if (!strcmp(name, "sockcreate"))
                 sid = __tsec->sockcreate_sid;
- -      else
- -              goto invalid;
+ +      else {
+ +              error = -EINVAL;
+ +              goto bad;
+ +      }
         rcu_read_unlock();
   
         if (!sid)
@@@ -5850,44 -5845,48 +5849,44 @@@
                 return error;
         return len;
   
- -invalid:
+ +bad:
         rcu_read_unlock();
- -      return -EINVAL;
+ +      return error;
   }
   
- -static int selinux_setprocattr(struct task_struct *p,
- -                             char *name, void *value, size_t size)
+ +static int selinux_setprocattr(const char *name, void *value, size_t size)
   {
         struct task_security_struct *tsec;
         struct cred *new;
- -      u32 sid = 0, ptsid;
+ +      u32 mysid = current_sid(), sid = 0, ptsid;
         int error;
         char *str = value;
   
- -      if (current != p) {
- -              /* SELinux only allows a process to change its own
- -                 security attributes. */
- -              return -EACCES;
- -      }
- -
         /*
          * Basic control over ability to set these attributes at all.
- -       * current == p, but we'll pass them separately in case the
- -       * above restriction is ever removed.
          */
         if (!strcmp(name, "exec"))
- -              error = current_has_perm(p, PROCESS__SETEXEC);
+ +              error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ +                                   PROCESS__SETEXEC, NULL);
         else if (!strcmp(name, "fscreate"))
- -              error = current_has_perm(p, PROCESS__SETFSCREATE);
+ +              error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ +                                   PROCESS__SETFSCREATE, NULL);
         else if (!strcmp(name, "keycreate"))
- -              error = current_has_perm(p, PROCESS__SETKEYCREATE);
+ +              error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ +                                   PROCESS__SETKEYCREATE, NULL);
         else if (!strcmp(name, "sockcreate"))
- -              error = current_has_perm(p, PROCESS__SETSOCKCREATE);
+ +              error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ +                                   PROCESS__SETSOCKCREATE, NULL);
         else if (!strcmp(name, "current"))
- -              error = current_has_perm(p, PROCESS__SETCURRENT);
+ +              error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ +                                   PROCESS__SETCURRENT, NULL);
         else
                 error = -EINVAL;
         if (error)
                 return error;
   
         /* Obtain a SID for the context, if one was specified. */
- -      if (size && str[1] && str[1] != '\n') {
+ +      if (size && str[0] && str[0] != '\n') {
                 if (str[size-1] == '\n') {
                         str[size-1] = 0;
                         size--;
@@@ -5934,8 -5933,7 +5933,8 @@@
         } else if (!strcmp(name, "fscreate")) {
                 tsec->create_sid = sid;
         } else if (!strcmp(name, "keycreate")) {
- -              error = may_create_key(sid, p);
+ +              error = avc_has_perm(mysid, sid, SECCLASS_KEY, KEY__CREATE,
+ +                                   NULL);
                 if (error)
                         goto abort_change;
                 tsec->keycreate_sid = sid;
@@@ -5962,7 -5960,7 +5961,7 @@@
   
                 /* Check for ptracing, and update the task SID if ok.
                    Otherwise, leave SID unchanged and fail. */
- -              ptsid = ptrace_parent_sid(p);
+ +              ptsid = ptrace_parent_sid();
                 if (ptsid != 0) {
                         error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
                                              PROCESS__PTRACE, NULL);
@@@ -6210,6 -6208,7 +6209,6 @@@ static struct security_hook_list selinu
         LSM_HOOK_INIT(task_getscheduler, selinux_task_getscheduler),
         LSM_HOOK_INIT(task_movememory, selinux_task_movememory),
         LSM_HOOK_INIT(task_kill, selinux_task_kill),
- -      LSM_HOOK_INIT(task_wait, selinux_task_wait),
         LSM_HOOK_INIT(task_to_inode, selinux_task_to_inode),
   
         LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission),
@@@ -6349,7 -6348,7 +6348,7 @@@ static __init int selinux_init(void
                                             0, SLAB_PANIC, NULL);
         avc_init();
   
- -      security_add_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks));
+ +      security_add_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks), "selinux");
   
         if (avc_add_callback(selinux_netcache_avc_callback, AVC_CALLBACK_RESET))
                 panic("SELinux: Unable to register AVC netcache callback\n");
diff --combined security/smack/smack_lsm.c

index 60b4217,bc2ff09..fc8fb31
--- 1/security/smack/smack_lsm.c
--- 2/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@@ -52,7 -52,6 +52,7 @@@
   #define SMK_SENDING   2
   
   #ifdef SMACK_IPV6_PORT_LABELING
+ +DEFINE_MUTEX(smack_ipv6_lock);
   static LIST_HEAD(smk_ipv6_port_list);
   #endif
   static struct kmem_cache *smack_inode_cache;
@@@ -348,6 -347,8 +348,6 @@@ static int smk_copy_rules(struct list_h
         struct smack_rule *orp;
         int rc = 0;
   
- -      INIT_LIST_HEAD(nhead);
- -
         list_for_each_entry_rcu(orp, ohead, list) {
                 nrp = kzalloc(sizeof(struct smack_rule), gfp);
                 if (nrp == NULL) {
@@@ -374,6 -375,8 +374,6 @@@ static int smk_copy_relabel(struct list
         struct smack_known_list_elem *nklep;
         struct smack_known_list_elem *oklep;
   
- -      INIT_LIST_HEAD(nhead);
- -
         list_for_each_entry(oklep, ohead, list) {
                 nklep = kzalloc(sizeof(struct smack_known_list_elem), gfp);
                 if (nklep == NULL) {
@@@ -931,7 -934,7 +931,7 @@@ static int smack_bprm_set_creds(struct 
             isp->smk_task != sbsp->smk_root)
                 return 0;
   
-       if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+       if (bprm->unsafe & LSM_UNSAFE_PTRACE) {
                 struct task_struct *tracer;
                 rc = 0;
   
@@@ -1006,39 -1009,15 +1006,39 @@@ static int smack_inode_alloc_security(s
   }
   
   /**
- - * smack_inode_free_security - free an inode blob
+ + * smack_inode_free_rcu - Free inode_smack blob from cache
+ + * @head: the rcu_head for getting inode_smack pointer
+ + *
+ + *  Call back function called from call_rcu() to free
+ + *  the i_security blob pointer in inode
+ + */
+ +static void smack_inode_free_rcu(struct rcu_head *head)
+ +{
+ +      struct inode_smack *issp;
+ +
+ +      issp = container_of(head, struct inode_smack, smk_rcu);
+ +      kmem_cache_free(smack_inode_cache, issp);
+ +}
+ +
+ +/**
+ + * smack_inode_free_security - free an inode blob using call_rcu()
    * @inode: the inode with a blob
    *
- - * Clears the blob pointer in inode
+ + * Clears the blob pointer in inode using RCU
    */
   static void smack_inode_free_security(struct inode *inode)
   {
- -      kmem_cache_free(smack_inode_cache, inode->i_security);
- -      inode->i_security = NULL;
+ +      struct inode_smack *issp = inode->i_security;
+ +
+ +      /*
+ +       * The inode may still be referenced in a path walk and
+ +       * a call to smack_inode_permission() can be made
+ +       * after smack_inode_free_security() is called.
+ +       * To avoid race condition free the i_security via RCU
+ +       * and leave the current inode->i_security pointer intact.
+ +       * The inode will be freed after the RCU grace period too.
+ +       */
+ +      call_rcu(&issp->smk_rcu, smack_inode_free_rcu);
   }
   
   /**
@@@ -1647,9 -1626,6 +1647,9 @@@ static int smack_file_ioctl(struct fil
         struct smk_audit_info ad;
         struct inode *inode = file_inode(file);
   
+ +      if (unlikely(IS_PRIVATE(inode)))
+ +              return 0;
+ +
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
         smk_ad_setfield_u_fs_path(&ad, file->f_path);
   
@@@ -1679,9 -1655,6 +1679,9 @@@ static int smack_file_lock(struct file 
         int rc;
         struct inode *inode = file_inode(file);
   
+ +      if (unlikely(IS_PRIVATE(inode)))
+ +              return 0;
+ +
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
         smk_ad_setfield_u_fs_path(&ad, file->f_path);
         rc = smk_curacc(smk_of_inode(inode), MAY_LOCK, &ad);
@@@ -1708,9 -1681,6 +1708,9 @@@ static int smack_file_fcntl(struct fil
         int rc = 0;
         struct inode *inode = file_inode(file);
   
+ +      if (unlikely(IS_PRIVATE(inode)))
+ +              return 0;
+ +
         switch (cmd) {
         case F_GETLK:
                 break;
@@@ -1764,9 -1734,6 +1764,9 @@@ static int smack_mmap_file(struct file 
         if (file == NULL)
                 return 0;
   
+ +      if (unlikely(IS_PRIVATE(file_inode(file))))
+ +              return 0;
+ +
         isp = file_inode(file)->i_security;
         if (isp->smk_mmap == NULL)
                 return 0;
@@@ -1967,9 -1934,12 +1967,9 @@@ static int smack_file_open(struct file 
         struct smk_audit_info ad;
         int rc;
   
- -      if (smack_privileged(CAP_MAC_OVERRIDE))
- -              return 0;
- -
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
         smk_ad_setfield_u_fs_path(&ad, file->f_path);
- -      rc = smk_access(tsp->smk_task, smk_of_inode(inode), MAY_READ, &ad);
+ +      rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad);
         rc = smk_bu_credfile(cred, file, MAY_READ, rc);
   
         return rc;
@@@ -2301,6 -2271,25 +2301,6 @@@ static int smack_task_kill(struct task_
         return rc;
   }
   
- -/**
- - * smack_task_wait - Smack access check for waiting
- - * @p: task to wait for
- - *
- - * Returns 0
- - */
- -static int smack_task_wait(struct task_struct *p)
- -{
- -      /*
- -       * Allow the operation to succeed.
- -       * Zombies are bad.
- -       * In userless environments (e.g. phones) programs
- -       * get marked with SMACK64EXEC and even if the parent
- -       * and child shouldn't be talking the parent still
- -       * may expect to know when the child exits.
- -       */
- -      return 0;
- -}
- -
   /**
    * smack_task_to_inode - copy task smack into the inode blob
    * @p: task to copy from
@@@ -2364,20 -2353,6 +2364,20 @@@ static int smack_sk_alloc_security(stru
    */
   static void smack_sk_free_security(struct sock *sk)
   {
+ +#ifdef SMACK_IPV6_PORT_LABELING
+ +      struct smk_port_label *spp;
+ +
+ +      if (sk->sk_family == PF_INET6) {
+ +              rcu_read_lock();
+ +              list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
+ +                      if (spp->smk_sock != sk)
+ +                              continue;
+ +                      spp->smk_can_reuse = 1;
+ +                      break;
+ +              }
+ +              rcu_read_unlock();
+ +      }
+ +#endif
         kfree(sk->sk_security);
   }
   
@@@ -2628,20 -2603,17 +2628,20 @@@ static void smk_ipv6_port_label(struct 
                  * on the bound socket. Take the changes to the port
                  * as well.
                  */
- -              list_for_each_entry(spp, &smk_ipv6_port_list, list) {
+ +              rcu_read_lock();
+ +              list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
                         if (sk != spp->smk_sock)
                                 continue;
                         spp->smk_in = ssp->smk_in;
                         spp->smk_out = ssp->smk_out;
+ +                      rcu_read_unlock();
                         return;
                 }
                 /*
                  * A NULL address is only used for updating existing
                  * bound entries. If there isn't one, it's OK.
                  */
+ +              rcu_read_unlock();
                 return;
         }
   
@@@ -2657,23 -2629,16 +2657,23 @@@
          * Look for an existing port list entry.
          * This is an indication that a port is getting reused.
          */
- -      list_for_each_entry(spp, &smk_ipv6_port_list, list) {
- -              if (spp->smk_port != port)
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
+ +              if (spp->smk_port != port || spp->smk_sock_type != sock->type)
                         continue;
+ +              if (spp->smk_can_reuse != 1) {
+ +                      rcu_read_unlock();
+ +                      return;
+ +              }
                 spp->smk_port = port;
                 spp->smk_sock = sk;
                 spp->smk_in = ssp->smk_in;
                 spp->smk_out = ssp->smk_out;
+ +              spp->smk_can_reuse = 0;
+ +              rcu_read_unlock();
                 return;
         }
- -
+ +      rcu_read_unlock();
         /*
          * A new port entry is required.
          */
@@@ -2685,12 -2650,8 +2685,12 @@@
         spp->smk_sock = sk;
         spp->smk_in = ssp->smk_in;
         spp->smk_out = ssp->smk_out;
+ +      spp->smk_sock_type = sock->type;
+ +      spp->smk_can_reuse = 0;
   
- -      list_add(&spp->list, &smk_ipv6_port_list);
+ +      mutex_lock(&smack_ipv6_lock);
+ +      list_add_rcu(&spp->list, &smk_ipv6_port_list);
+ +      mutex_unlock(&smack_ipv6_lock);
         return;
   }
   
@@@ -2741,16 -2702,14 +2741,16 @@@ static int smk_ipv6_port_check(struct s
                 return 0;
   
         port = ntohs(address->sin6_port);
- -      list_for_each_entry(spp, &smk_ipv6_port_list, list) {
- -              if (spp->smk_port != port)
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
+ +              if (spp->smk_port != port || spp->smk_sock_type != sk->sk_type)
                         continue;
                 object = spp->smk_in;
                 if (act == SMK_CONNECTING)
                         ssp->smk_packet = spp->smk_out;
                 break;
         }
+ +      rcu_read_unlock();
   
         return smk_ipv6_check(skp, object, address, act);
   }
@@@ -3479,13 -3438,6 +3479,13 @@@ static void smack_d_instantiate(struct 
                 case PIPEFS_MAGIC:
                         isp->smk_inode = smk_of_current();
                         break;
+ +              case SOCKFS_MAGIC:
+ +                      /*
+ +                       * Socket access is controlled by the socket
+ +                       * structures associated with the task involved.
+ +                       */
+ +                      isp->smk_inode = &smack_known_star;
+ +                      break;
                 default:
                         isp->smk_inode = sbsp->smk_root;
                         break;
@@@ -3502,12 -3454,19 +3502,12 @@@
          */
         switch (sbp->s_magic) {
         case SMACK_MAGIC:
- -      case PIPEFS_MAGIC:
- -      case SOCKFS_MAGIC:
         case CGROUP_SUPER_MAGIC:
                 /*
                  * Casey says that it's a little embarrassing
                  * that the smack file system doesn't do
                  * extended attributes.
                  *
- -               * Casey says pipes are easy (?)
- -               *
- -               * Socket access is controlled by the socket
- -               * structures associated with the task involved.
- -               *
                  * Cgroupfs is special
                  */
                 final = &smack_known_star;
@@@ -3661,6 -3620,7 +3661,6 @@@ static int smack_getprocattr(struct tas
   
   /**
    * smack_setprocattr - Smack process attribute setting
- - * @p: the object task
    * @name: the name of the attribute in /proc/.../attr
    * @value: the value to set
    * @size: the size of the value
@@@ -3670,7 -3630,8 +3670,7 @@@
    *
    * Returns the length of the smack label or an error code
    */
- -static int smack_setprocattr(struct task_struct *p, char *name,
- -                           void *value, size_t size)
+ +static int smack_setprocattr(const char *name, void *value, size_t size)
   {
         struct task_smack *tsp = current_security();
         struct cred *new;
@@@ -3678,6 -3639,13 +3678,6 @@@
         struct smack_known_list_elem *sklep;
         int rc;
   
- -      /*
- -       * Changing another process' Smack value is too dangerous
- -       * and supports no sane use case.
- -       */
- -      if (p != current)
- -              return -EPERM;
- -
         if (!smack_privileged(CAP_MAC_ADMIN) && list_empty(&tsp->smk_relabel))
                 return -EPERM;
   
@@@ -3881,7 -3849,7 +3881,7 @@@ static struct smack_known *smack_from_s
                  * ambient value.
                  */
                 rcu_read_lock();
- -              list_for_each_entry(skp, &smack_known_list, list) {
+ +              list_for_each_entry_rcu(skp, &smack_known_list, list) {
                         if (sap->attr.mls.lvl != skp->smk_netlabel.attr.mls.lvl)
                                 continue;
                         /*
@@@ -4699,6 -4667,7 +4699,6 @@@ static struct security_hook_list smack_
         LSM_HOOK_INIT(task_getscheduler, smack_task_getscheduler),
         LSM_HOOK_INIT(task_movememory, smack_task_movememory),
         LSM_HOOK_INIT(task_kill, smack_task_kill),
- -      LSM_HOOK_INIT(task_wait, smack_task_wait),
         LSM_HOOK_INIT(task_to_inode, smack_task_to_inode),
   
         LSM_HOOK_INIT(ipc_permission, smack_ipc_permission),
@@@ -4850,7 -4819,7 +4850,7 @@@ static __init int smack_init(void
         /*
          * Register with LSM
          */
- -      security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks));
+ +      security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack");
   
         return 0;
   }
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 24 Feb 2017 04:33:51 +0000 (20:33 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 24 Feb 2017 04:33:51 +0000 (20:33 -0800)
		1	2
fs/debugfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/base.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/debugfs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fsnotify_backend.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/security.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sys.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/ucount.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/apparmor/domain.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/commoncap.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/selinux/hooks.c	patch \|	diff1 \|	diff2 \|	blob \| history
security/smack/smack_lsm.c	patch \|	diff1 \|	diff2 \|	blob \| history