Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jun 2021 03:39:26 +0000 (20:39 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jun 2021 03:39:26 +0000 (20:39 -0700)
Pull user namespace rlimit handling update from Eric Biederman:
 "This is the work mainly by Alexey Gladkov to limit rlimits to the
  rlimits of the user that created a user namespace, and to allow users
  to have stricter limits on the resources created within a user
  namespace."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  cred: add missing return error code when set_cred_ucounts() failed
  ucounts: Silence warning in dec_rlimit_ucounts
  ucounts: Set ucount_max to the largest positive value the type can hold
  kselftests: Add test to check for rlimit changes in different user namespaces
  Reimplement RLIMIT_MEMLOCK on top of ucounts
  Reimplement RLIMIT_SIGPENDING on top of ucounts
  Reimplement RLIMIT_MSGQUEUE on top of ucounts
  Reimplement RLIMIT_NPROC on top of ucounts
  Use atomic_t for ucounts reference counting
  Add a reference to ucounts for each cred
  Increase size of ucounts to atomic_long_t

19 files changed:
1  2 
fs/hugetlbfs/inode.c
fs/proc/array.c
include/linux/cred.h
include/linux/hugetlb.h
include/linux/mm.h
include/linux/sched/user.h
include/linux/user_namespace.h
ipc/mqueue.c
kernel/cred.c
kernel/exit.c
kernel/fork.c
kernel/signal.c
kernel/sys.c
kernel/ucount.c
kernel/user_namespace.c
mm/mlock.c
mm/mmap.c
mm/shmem.c
tools/testing/selftests/Makefile

Simple merge
diff --cc fs/proc/array.c
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -12,8 -12,9 +12,6 @@@
   */
  struct user_struct {
        refcount_t __count;     /* reference count */
-       atomic_t processes;     /* How many processes does this user have? */
-       atomic_t sigpending;    /* How many pending signals does this user have? */
 -#ifdef CONFIG_FANOTIFY
 -      atomic_t fanotify_listeners;
 -#endif
  #ifdef CONFIG_EPOLL
        atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
  #endif
@@@ -49,11 -49,11 +49,15 @@@ enum ucount_type 
  #ifdef CONFIG_INOTIFY_USER
        UCOUNT_INOTIFY_INSTANCES,
        UCOUNT_INOTIFY_WATCHES,
 +#endif
 +#ifdef CONFIG_FANOTIFY
 +      UCOUNT_FANOTIFY_GROUPS,
 +      UCOUNT_FANOTIFY_MARKS,
  #endif
+       UCOUNT_RLIMIT_NPROC,
+       UCOUNT_RLIMIT_MSGQUEUE,
+       UCOUNT_RLIMIT_SIGPENDING,
+       UCOUNT_RLIMIT_MEMLOCK,
        UCOUNT_COUNTS,
  };
  
diff --cc ipc/mqueue.c
Simple merge
diff --cc kernel/cred.c
Simple merge
diff --cc kernel/exit.c
Simple merge
diff --cc kernel/fork.c
@@@ -2388,10 -2386,10 +2392,10 @@@ bad_fork_cleanup_threadgroup_lock
  #endif
        delayacct_tsk_free(p);
  bad_fork_cleanup_count:
-       atomic_dec(&p->cred->user->processes);
+       dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
        exit_creds(p);
  bad_fork_free:
 -      p->state = TASK_DEAD;
 +      WRITE_ONCE(p->__state, TASK_DEAD);
        put_task_stack(p);
        delayed_free_task(p);
  fork_out:
diff --cc kernel/signal.c
@@@ -408,12 -410,11 +408,12 @@@ void task_join_group_stop(struct task_s
   *   appropriate lock must be held to stop the target task from exiting
   */
  static struct sigqueue *
 -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
 +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 +               int override_rlimit, const unsigned int sigqueue_flags)
  {
        struct sigqueue *q = NULL;
-       struct user_struct *user;
-       int sigpending;
+       struct ucounts *ucounts = NULL;
+       long sigpending;
  
        /*
         * Protect access to @t credentials. This can go away when all
         * changes from/to zero.
         */
        rcu_read_lock();
-       user = __task_cred(t)->user;
-       sigpending = atomic_inc_return(&user->sigpending);
+       ucounts = task_ucounts(t);
+       sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
        if (sigpending == 1)
-               get_uid(user);
+               ucounts = get_ucounts(ucounts);
        rcu_read_unlock();
  
-       if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
+       if (override_rlimit || (sigpending < LONG_MAX && sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
 -              q = kmem_cache_alloc(sigqueue_cachep, flags);
 +              q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
        } else {
                print_dropped_signal(sig);
        }
  
        if (unlikely(q == NULL)) {
-               if (atomic_dec_and_test(&user->sigpending))
-                       free_uid(user);
+               if (ucounts && dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
+                       put_ucounts(ucounts);
        } else {
                INIT_LIST_HEAD(&q->list);
 -              q->flags = 0;
 +              q->flags = sigqueue_flags;
-               q->user = user;
+               q->ucounts = ucounts;
        }
        return q;
  }
  
diff --cc kernel/sys.c
Simple merge
diff --cc kernel/ucount.c
@@@ -73,11 -79,11 +79,15 @@@ static struct ctl_table user_table[] = 
  #ifdef CONFIG_INOTIFY_USER
        UCOUNT_ENTRY("max_inotify_instances"),
        UCOUNT_ENTRY("max_inotify_watches"),
 +#endif
 +#ifdef CONFIG_FANOTIFY
 +      UCOUNT_ENTRY("max_fanotify_groups"),
 +      UCOUNT_ENTRY("max_fanotify_marks"),
  #endif
+       { },
+       { },
+       { },
+       { },
        { }
  };
  #endif /* CONFIG_SYSCTL */
Simple merge
diff --cc mm/mlock.c
Simple merge
diff --cc mm/mmap.c
Simple merge
diff --cc mm/shmem.c
Simple merge
Simple merge