static struct vfsmount *debugfs_automount(struct path *path)
{
- struct vfsmount *(*f)(void *);
- f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
- return f(d_inode(path->dentry)->i_private);
+ debugfs_automount_t f;
+ f = (debugfs_automount_t)path->dentry->d_fsdata;
+ return f(path->dentry, d_inode(path->dentry)->i_private);
}
static const struct dentry_operations debugfs_dops = {
};
MODULE_ALIAS_FS("debugfs");
+/**
+ * debugfs_lookup() - look up an existing debugfs file
+ * @name: a pointer to a string containing the name of the file to look up.
+ * @parent: a pointer to the parent dentry of the file.
+ *
+ * This function will return a pointer to a dentry if it succeeds. If the file
+ * doesn't exist or an error occurs, %NULL will be returned. The returned
+ * dentry must be passed to dput() when it is no longer needed.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
+{
+ struct dentry *dentry;
+
+ if (IS_ERR(parent))
+ return NULL;
+
+ if (!parent)
+ parent = debugfs_mount->mnt_root;
+
+ inode_lock(d_inode(parent));
+ dentry = lookup_one_len(name, parent, strlen(name));
+ inode_unlock(d_inode(parent));
+
+ if (IS_ERR(dentry))
+ return NULL;
+ if (!d_really_is_positive(dentry)) {
+ dput(dentry);
+ return NULL;
+ }
+ return dentry;
+}
+EXPORT_SYMBOL_GPL(debugfs_lookup);
+
static struct dentry *start_creating(const char *name, struct dentry *parent)
{
struct dentry *dentry;
*/
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data)
{
struct dentry *dentry = start_creating(name, parent);
/* building an inode */
+ void task_dump_owner(struct task_struct *task, mode_t mode,
+ kuid_t *ruid, kgid_t *rgid)
+ {
+ /* Depending on the state of dumpable compute who should own a
+ * proc file for a task.
+ */
+ const struct cred *cred;
+ kuid_t uid;
+ kgid_t gid;
+
+ /* Default to the tasks effective ownership */
+ rcu_read_lock();
+ cred = __task_cred(task);
+ uid = cred->euid;
+ gid = cred->egid;
+ rcu_read_unlock();
+
+ /*
+ * Before the /proc/pid/status file was created the only way to read
+ * the effective uid of a /process was to stat /proc/pid. Reading
+ * /proc/pid/status is slow enough that procps and other packages
+ * kept stating /proc/pid. To keep the rules in /proc simple I have
+ * made this apply to all per process world readable and executable
+ * directories.
+ */
+ if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
+ struct mm_struct *mm;
+ task_lock(task);
+ mm = task->mm;
+ /* Make non-dumpable tasks owned by some root */
+ if (mm) {
+ if (get_dumpable(mm) != SUID_DUMP_USER) {
+ struct user_namespace *user_ns = mm->user_ns;
+
+ uid = make_kuid(user_ns, 0);
+ if (!uid_valid(uid))
+ uid = GLOBAL_ROOT_UID;
+
+ gid = make_kgid(user_ns, 0);
+ if (!gid_valid(gid))
+ gid = GLOBAL_ROOT_GID;
+ }
+ } else {
+ uid = GLOBAL_ROOT_UID;
+ gid = GLOBAL_ROOT_GID;
+ }
+ task_unlock(task);
+ }
+ *ruid = uid;
+ *rgid = gid;
+ }
+
struct inode *proc_pid_make_inode(struct super_block * sb,
struct task_struct *task, umode_t mode)
{
struct inode * inode;
struct proc_inode *ei;
- const struct cred *cred;
/* We need a new inode */
if (!ei->pid)
goto out_unlock;
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
security_task_to_inode(task, inode);
out:
{
struct inode *inode = d_inode(dentry);
struct task_struct *task;
- const struct cred *cred;
struct pid_namespace *pid = dentry->d_sb->s_fs_info;
generic_fillattr(inode, stat);
*/
return -ENOENT;
}
- if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
- task_dumpable(task)) {
- cred = __task_cred(task);
- stat->uid = cred->euid;
- stat->gid = cred->egid;
- }
+ task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
}
rcu_read_unlock();
return 0;
* Rewrite the inode's ownerships here because the owning task may have
* performed a setuid(), etc.
*
- * Before the /proc/pid/status file was created the only way to read
- * the effective uid of a /process was to stat /proc/pid. Reading
- * /proc/pid/status is slow enough that procps and other packages
- * kept stating /proc/pid. To keep the rules in /proc simple I have
- * made this apply to all per process world readable and executable
- * directories.
*/
int pid_revalidate(struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
struct task_struct *task;
- const struct cred *cred;
if (flags & LOOKUP_RCU)
return -ECHILD;
task = get_proc_task(inode);
if (task) {
- if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
- task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
+
inode->i_mode &= ~(S_ISUID | S_ISGID);
security_task_to_inode(task, inode);
put_task_struct(task);
bool exact_vma_exists = false;
struct mm_struct *mm = NULL;
struct task_struct *task;
- const struct cred *cred;
struct inode *inode;
int status = 0;
mmput(mm);
if (exact_vma_exists) {
- if (task_dumpable(task)) {
- rcu_read_lock();
- cred = __task_cred(task);
- inode->i_uid = cred->euid;
- inode->i_gid = cred->egid;
- rcu_read_unlock();
- } else {
- inode->i_uid = GLOBAL_ROOT_UID;
- inode->i_gid = GLOBAL_ROOT_GID;
- }
+ task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
+
security_task_to_inode(task, inode);
status = 1;
}
.llseek = generic_file_llseek,
};
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
struct timers_private {
struct pid *pid;
struct task_struct *task;
length = -ESRCH;
if (!task)
goto out_no_task;
+
+ /* A task may only write its own attributes. */
+ length = -EACCES;
+ if (current != task)
+ goto out;
+
if (count > PAGE_SIZE)
count = PAGE_SIZE;
}
/* Guard against adverse ptrace interaction */
- length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
+ length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex);
if (length < 0)
goto out_free;
- length = security_setprocattr(task,
- (char*)file->f_path.dentry->d_name.name,
+ length = security_setprocattr(file->f_path.dentry->d_name.name,
page, count);
- mutex_unlock(&task->signal->cred_guard_mutex);
+ mutex_unlock(¤t->signal->cred_guard_mutex);
out_free:
kfree(page);
out:
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
-#ifdef CONFIG_CHECKPOINT_RESTORE
+#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
REG("timers", S_IRUGO, proc_timers_operations),
#endif
REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
iter.tgid += 1, iter = next_tgid(ns, iter)) {
char name[PROC_NUMBUF];
int len;
+
+ cond_resched();
if (!has_pid_permissions(ns, iter.task, 2))
continue;
struct super_block *old;
int err;
- if (!(flags & MS_KERNMOUNT) &&
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) &&
!(type->fs_flags & FS_USERNS_MOUNT) &&
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
}
if (!s) {
spin_unlock(&sb_lock);
- s = alloc_super(type, flags, user_ns);
+ s = alloc_super(type, (flags & ~MS_SUBMOUNT), user_ns);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
{
struct user_namespace *user_ns = current_user_ns();
+ /* We don't yet pass the user namespace of the parent
+ * mount through to here so always use &init_user_ns
+ * until that changes.
+ */
+ if (flags & MS_SUBMOUNT)
+ user_ns = &init_user_ns;
+
/* Ensure the requestor has permissions over the target filesystem */
- if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, CAP_SYS_ADMIN))
+ if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && !ns_capable(user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
return sget_userns(type, test, set, flags, user_ns, data);
* We set the bdi here to the queue backing, file systems can
* overwrite this in ->fill_super()
*/
- s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
+ s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info;
return 0;
}
* Must only be called under the protection established by
* debugfs_use_file_start().
*/
-static inline const struct file_operations *
-debugfs_real_fops(const struct file *filp)
+static inline const struct file_operations *debugfs_real_fops(const struct file *filp)
__must_hold(&debugfs_srcu)
{
/*
#if defined(CONFIG_DEBUG_FS)
+struct dentry *debugfs_lookup(const char *name, struct dentry *parent);
+
struct dentry *debugfs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops);
struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
const char *dest);
+ typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *);
struct dentry *debugfs_create_automount(const char *name,
struct dentry *parent,
- struct vfsmount *(*f)(void *),
+ debugfs_automount_t f,
void *data);
void debugfs_remove(struct dentry *dentry);
* want to duplicate the design decision mistakes of procfs and devfs again.
*/
+static inline struct dentry *debugfs_lookup(const char *name,
+ struct dentry *parent)
+{
+ return ERR_PTR(-ENODEV);
+}
+
static inline struct dentry *debugfs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops)
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/atomic.h>
+ #include <linux/user_namespace.h>
/*
* IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
struct inotify_group_private_data {
spinlock_t idr_lock;
struct idr idr;
- struct user_struct *user;
+ struct ucounts *ucounts;
} inotify_data;
#endif
#ifdef CONFIG_FANOTIFY
extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode);
/* find (and take a reference) to a mark associated with group and vfsmount */
extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt);
-/* copy the values from old into new */
-extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old);
/* set the ignored_mask of a mark */
extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask);
/* set the mask of a mark (might pin the object into memory */
#include <asm/page.h>
#include <asm/ptrace.h>
-#include <linux/cputime.h>
#include <linux/smp.h>
#include <linux/sem.h>
extern char ___assert_task_state[1 - 2*!!(
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
-/* Convenience macros for the sake of set_task_state */
+/* Convenience macros for the sake of set_current_state */
#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-#define __set_task_state(tsk, state_value) \
- do { \
- (tsk)->task_state_change = _THIS_IP_; \
- (tsk)->state = (state_value); \
- } while (0)
-#define set_task_state(tsk, state_value) \
- do { \
- (tsk)->task_state_change = _THIS_IP_; \
- smp_store_mb((tsk)->state, (state_value)); \
- } while (0)
-
#define __set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
} while (0)
#else
-
-/*
- * @tsk had better be current, or you get to keep the pieces.
- *
- * The only reason is that computing current can be more expensive than
- * using a pointer that's already available.
- *
- * Therefore, see set_current_state().
- */
-#define __set_task_state(tsk, state_value) \
- do { (tsk)->state = (state_value); } while (0)
-#define set_task_state(tsk, state_value) \
- smp_store_mb((tsk)->state, (state_value))
-
/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
+extern int __must_check io_schedule_prepare(void);
+extern void io_schedule_finish(int token);
extern long io_schedule_timeout(long timeout);
-
-static inline void io_schedule(void)
-{
- io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
-}
+extern void io_schedule(void);
void __noreturn do_task_dead(void);
int ac_flag;
long ac_exitcode;
unsigned long ac_mem;
- cputime_t ac_utime, ac_stime;
+ u64 ac_utime, ac_stime;
unsigned long ac_minflt, ac_majflt;
};
struct cpu_itimer {
- cputime_t expires;
- cputime_t incr;
- u32 error;
- u32 incr_error;
+ u64 expires;
+ u64 incr;
};
/**
*/
struct prev_cputime {
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- cputime_t utime;
- cputime_t stime;
+ u64 utime;
+ u64 stime;
raw_spinlock_t lock;
#endif
};
/**
* struct task_cputime - collected CPU time counts
- * @utime: time spent in user mode, in &cputime_t units
- * @stime: time spent in kernel mode, in &cputime_t units
+ * @utime: time spent in user mode, in nanoseconds
+ * @stime: time spent in kernel mode, in nanoseconds
* @sum_exec_runtime: total time spent on the CPU, in nanoseconds
*
* This structure groups together three kinds of CPU time that are tracked for
* these counts together and treat all three of them in parallel.
*/
struct task_cputime {
- cputime_t utime;
- cputime_t stime;
+ u64 utime;
+ u64 stime;
unsigned long long sum_exec_runtime;
};
#define prof_exp stime
#define sched_exp sum_exec_runtime
-#define INIT_CPUTIME \
- (struct task_cputime) { \
- .utime = 0, \
- .stime = 0, \
- .sum_exec_runtime = 0, \
- }
-
/*
* This is the atomic variant of task_cputime, which can be used for
* storing and updating task_cputime statistics without locking.
unsigned int is_child_subreaper:1;
unsigned int has_child_subreaper:1;
+#ifdef CONFIG_POSIX_TIMERS
+
/* POSIX.1b Interval Timers */
int posix_timer_id;
struct list_head posix_timers;
/* ITIMER_REAL timer for the process */
struct hrtimer real_timer;
- struct pid *leader_pid;
ktime_t it_real_incr;
/*
/* Earliest-expiration cache. */
struct task_cputime cputime_expires;
+ struct list_head cpu_timers[3];
+
+#endif
+
+ struct pid *leader_pid;
+
#ifdef CONFIG_NO_HZ_FULL
atomic_t tick_dep_mask;
#endif
- struct list_head cpu_timers[3];
-
struct pid *tty_old_pgrp;
/* boolean value for session group leader */
* in __exit_signal, except for the group leader.
*/
seqlock_t stats_lock;
- cputime_t utime, stime, cutime, cstime;
- cputime_t gtime;
- cputime_t cgtime;
+ u64 utime, stime, cutime, cstime;
+ u64 gtime;
+ u64 cgtime;
struct prev_cputime prev_cputime;
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */
+#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
+ SIGNAL_STOP_CONTINUED)
+
+static inline void signal_set_stop_flags(struct signal_struct *sig,
+ unsigned int flags)
+{
+ WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
+ sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
+}
+
/* If true, all threads except ->group_exit_task have pending SIGKILL */
static inline int signal_group_exit(const struct signal_struct *sig)
{
atomic_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
- #ifdef CONFIG_INOTIFY_USER
- atomic_t inotify_watches; /* How many inotify watches does this user have? */
- atomic_t inotify_devs; /* How many inotify devs does this user have opened? */
- #endif
#ifdef CONFIG_FANOTIFY
atomic_t fanotify_listeners;
#endif
*
* The DEFINE_WAKE_Q macro declares and initializes the list head.
* wake_up_q() does NOT reinitialize the list; it's expected to be
- * called near the end of a function, where the fact that the queue is
- * not used again will be easy to see by inspection.
+ * called near the end of a function. Otherwise, the list can be
+ * re-initialized for later re-use by wake_q_init().
*
* Note that this can cause spurious wakeups. schedule() callers
* must ensure the call is done inside a loop, confirming that the
#define DEFINE_WAKE_Q(name) \
struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
+static inline void wake_q_init(struct wake_q_head *head)
+{
+ head->first = WAKE_Q_TAIL;
+ head->lastp = &head->first;
+}
+
extern void wake_q_add(struct wake_q_head *head,
struct task_struct *task);
extern void wake_up_q(struct wake_q_head *head);
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
- cputime_t utime, stime;
+ u64 utime, stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
- cputime_t utimescaled, stimescaled;
+ u64 utimescaled, stimescaled;
#endif
- cputime_t gtime;
+ u64 gtime;
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqcount_t vtime_seqcount;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt;
+#ifdef CONFIG_POSIX_TIMERS
struct task_cputime cputime_expires;
struct list_head cpu_timers[3];
+#endif
/* process credentials */
const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
#if defined(CONFIG_TASK_XACCT)
u64 acct_rss_mem1; /* accumulated rss usage */
u64 acct_vm_mem1; /* accumulated virtual memory usage */
- cputime_t acct_timexpd; /* stime + utime since last update */
+ u64 acct_timexpd; /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
nodemask_t mems_allowed; /* Protected by alloc_lock */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
- cputime_t *utime, cputime_t *stime);
-extern cputime_t task_gtime(struct task_struct *t);
+ u64 *utime, u64 *stime);
+extern u64 task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
- cputime_t *utime, cputime_t *stime)
+ u64 *utime, u64 *stime)
{
*utime = t->utime;
*stime = t->stime;
}
-static inline cputime_t task_gtime(struct task_struct *t)
+static inline u64 task_gtime(struct task_struct *t)
{
return t->gtime;
}
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
static inline void task_cputime_scaled(struct task_struct *t,
- cputime_t *utimescaled,
- cputime_t *stimescaled)
+ u64 *utimescaled,
+ u64 *stimescaled)
{
*utimescaled = t->utimescaled;
*stimescaled = t->stimescaled;
}
#else
static inline void task_cputime_scaled(struct task_struct *t,
- cputime_t *utimescaled,
- cputime_t *stimescaled)
+ u64 *utimescaled,
+ u64 *stimescaled)
{
task_cputime(t, utimescaled, stimescaled);
}
#endif
-extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
-extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
+extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
+extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
/*
* Per process flags
extern void sched_clock_init(void);
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static inline void sched_clock_init_late(void)
+{
+}
+
static inline void sched_clock_tick(void)
{
}
+static inline void clear_sched_clock_stable(void)
+{
+}
+
static inline void sched_clock_idle_sleep_event(void)
{
}
return sched_clock();
}
#else
+extern void sched_clock_init_late(void);
/*
* Architectures can set this to 1 if they have specified
* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
* is reliable after all:
*/
extern int sched_clock_stable(void);
-extern void set_sched_clock_stable(void);
extern void clear_sched_clock_stable(void);
extern void sched_clock_tick(void);
#define for_each_process_thread(p, t) \
for_each_process(p) for_each_thread(p, t)
+ typedef int (*proc_visitor)(struct task_struct *p, void *data);
+ void walk_process_tree(struct task_struct *top, proc_visitor, void *);
+
static inline int get_nr_threads(struct task_struct *tsk)
{
return tsk->signal->nr_threads;
/* bprm->unsafe reasons */
#define LSM_UNSAFE_SHARE 1
#define LSM_UNSAFE_PTRACE 2
- #define LSM_UNSAFE_PTRACE_CAP 4
- #define LSM_UNSAFE_NO_NEW_PRIVS 8
+ #define LSM_UNSAFE_NO_NEW_PRIVS 4
#ifdef CONFIG_MMU
extern int mmap_min_addr_handler(struct ctl_table *table, int write,
int security_task_movememory(struct task_struct *p);
int security_task_kill(struct task_struct *p, struct siginfo *info,
int sig, u32 secid);
-int security_task_wait(struct task_struct *p);
int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5);
void security_task_to_inode(struct task_struct *p, struct inode *inode);
unsigned nsops, int alter);
void security_d_instantiate(struct dentry *dentry, struct inode *inode);
int security_getprocattr(struct task_struct *p, char *name, char **value);
-int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size);
+int security_setprocattr(const char *name, void *value, size_t size);
int security_netlink_send(struct sock *sk, struct sk_buff *skb);
int security_ismaclabel(const char *name);
int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
return 0;
}
-static inline int security_task_wait(struct task_struct *p)
-{
- return 0;
-}
-
static inline int security_task_prctl(int option, unsigned long arg2,
unsigned long arg3,
unsigned long arg4,
return -EINVAL;
}
-static inline int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static inline int security_setprocattr(char *name, void *value, size_t size)
{
return -EINVAL;
}
#include <linux/tty.h>
#include <linux/iocontext.h>
#include <linux/key.h>
-#include <linux/security.h>
#include <linux/cpu.h>
#include <linux/acct.h>
#include <linux/tsacct_kern.h>
#include <linux/shm.h>
#include <linux/kcov.h>
#include <linux/random.h>
+#include <linux/rcuwait.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
struct tty_struct *uninitialized_var(tty);
- cputime_t utime, stime;
+ u64 utime, stime;
sighand = rcu_dereference_check(tsk->sighand,
lockdep_tasklist_lock_is_held());
return task;
}
+void rcuwait_wake_up(struct rcuwait *w)
+{
+ struct task_struct *task;
+
+ rcu_read_lock();
+
+ /*
+ * Order condition vs @task, such that everything prior to the load
+ * of @task is visible. This is the condition as to why the user called
+ * rcuwait_trywake() in the first place. Pairs with set_current_state()
+ * barrier (A) in rcuwait_wait_event().
+ *
+ * WAIT WAKE
+ * [S] tsk = current [S] cond = true
+ * MB (A) MB (B)
+ * [L] cond [L] tsk
+ */
+ smp_rmb(); /* (B) */
+
+ /*
+ * Avoid using task_rcu_dereference() magic as long as we are careful,
+ * see comment in rcuwait_wait_event() regarding ->exit_state.
+ */
+ task = rcu_dereference(w->task);
+ if (task)
+ wake_up_process(task);
+ rcu_read_unlock();
+}
+
struct task_struct *try_get_task_struct(struct task_struct **ptask)
{
struct task_struct *task;
* Turn us into a lazy TLB process if we
* aren't already..
*/
-static void exit_mm(struct task_struct *tsk)
+static void exit_mm(void)
{
- struct mm_struct *mm = tsk->mm;
+ struct mm_struct *mm = current->mm;
struct core_state *core_state;
- mm_release(tsk, mm);
+ mm_release(current, mm);
if (!mm)
return;
sync_mm_rss(mm);
up_read(&mm->mmap_sem);
- self.task = tsk;
+ self.task = current;
self.next = xchg(&core_state->dumper.next, &self);
/*
* Implies mb(), the result of xchg() must be visible
complete(&core_state->startup);
for (;;) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (!self.task) /* see coredump_finish() */
break;
freezable_schedule();
}
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
down_read(&mm->mmap_sem);
}
atomic_inc(&mm->mm_count);
- BUG_ON(mm != tsk->active_mm);
+ BUG_ON(mm != current->active_mm);
/* more a memory barrier than a real lock */
- task_lock(tsk);
- tsk->mm = NULL;
+ task_lock(current);
+ current->mm = NULL;
up_read(&mm->mmap_sem);
enter_lazy_tlb(mm, current);
- task_unlock(tsk);
+ task_unlock(current);
mm_update_next_owner(mm);
mmput(mm);
if (test_thread_flag(TIF_MEMDIE))
return thread;
if (father->signal->has_child_subreaper) {
+ unsigned int ns_level = task_pid(father)->level;
/*
* Find the first ->is_child_subreaper ancestor in our pid_ns.
- * We start from father to ensure we can not look into another
- * namespace, this is safe because all its threads are dead.
+ * We can't check reaper != child_reaper to ensure we do not
+ * cross the namespaces, the exiting parent could be injected
+ * by setns() + fork().
+ * We check pid->level, this is slightly more efficient than
+ * task_active_pid_ns(reaper) != task_active_pid_ns(father).
*/
- for (reaper = father;
- !same_thread_group(reaper, child_reaper);
+ for (reaper = father->real_parent;
+ task_pid(reaper)->level == ns_level;
reaper = reaper->real_parent) {
- /* call_usermodehelper() descendants need this check */
if (reaper == &init_task)
break;
if (!reaper->signal->is_child_subreaper)
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
- exit_mm(tsk);
+ exit_mm();
if (group_dead)
acct_process();
struct signal_struct *sig = p->signal;
struct signal_struct *psig = current->signal;
unsigned long maxrss;
- cputime_t tgutime, tgstime;
+ u64 tgutime, tgstime;
/*
* The resource counters for the group leader are in its
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
* Returns zero if the search for a child should continue;
* then ->notask_error is 0 if @p is an eligible child,
- * or another error from security_task_wait(), or still -ECHILD.
+ * or still -ECHILD.
*/
static int wait_consider_task(struct wait_opts *wo, int ptrace,
struct task_struct *p)
if (!ret)
return ret;
- ret = security_task_wait(p);
- if (unlikely(ret < 0)) {
- /*
- * If we have not yet seen any eligible child,
- * then let this error code replace -ECHILD.
- * A permission error will give the user a clue
- * to look for security policy problems, rather
- * than for mysterious wait bugs.
- */
- if (wo->notask_error)
- wo->notask_error = ret;
- return 0;
- }
-
if (unlikely(exit_state == EXIT_TRACE)) {
/*
* ptrace == 0 means we are the natural parent. In this case
* Returns nonzero for a final return, when we have unlocked tasklist_lock.
* Returns zero if the search for a child should continue; then
* ->notask_error is 0 if there were any eligible children,
- * or another error from security_task_wait(), or still -ECHILD.
+ * or still -ECHILD.
*/
static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk)
{
#include <linux/rmap.h>
#include <linux/ksm.h>
#include <linux/acct.h>
+#include <linux/userfaultfd_k.h>
#include <linux/tsacct_kern.h>
#include <linux/cn_proc.h>
#include <linux/freezer.h>
int i;
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
-#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
+#define ARCH_MIN_TASKALIGN 0
#endif
+ int align = max_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
+
/* create a slab on which task_structs can be allocated */
task_struct_cachep = kmem_cache_create("task_struct",
- arch_task_struct_size, ARCH_MIN_TASKALIGN,
+ arch_task_struct_size, align,
SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
#endif
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
+ LIST_HEAD(uf);
uprobe_start_dup_mmap();
if (down_write_killable(&oldmm->mmap_sem)) {
if (retval)
goto fail_nomem_policy;
tmp->vm_mm = mm;
+ retval = dup_userfaultfd(tmp, &uf);
+ if (retval)
+ goto fail_nomem_anon_vma_fork;
if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
- tmp->vm_flags &=
- ~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP);
+ tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
tmp->vm_next = tmp->vm_prev = NULL;
- tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
file = tmp->vm_file;
if (file) {
struct inode *inode = file_inode(file);
up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm);
up_write(&oldmm->mmap_sem);
+ dup_userfaultfd_complete(&uf);
fail_uprobe_end:
uprobe_end_dup_mmap();
return retval;
}
}
+#ifdef CONFIG_POSIX_TIMERS
/*
* Initialize POSIX timer handling for a thread group.
*/
cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
if (cpu_limit != RLIM_INFINITY) {
- sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
+ sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
sig->cputimer.running = true;
}
INIT_LIST_HEAD(&sig->cpu_timers[1]);
INIT_LIST_HEAD(&sig->cpu_timers[2]);
}
+#else
+static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { }
+#endif
static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
{
init_waitqueue_head(&sig->wait_chldexit);
sig->curr_target = tsk;
init_sigpending(&sig->shared_pending);
- INIT_LIST_HEAD(&sig->posix_timers);
seqlock_init(&sig->stats_lock);
prev_cputime_init(&sig->prev_cputime);
#ifdef CONFIG_POSIX_TIMERS
+ INIT_LIST_HEAD(&sig->posix_timers);
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sig->real_timer.function = it_real_fn;
#endif
sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
- sig->has_child_subreaper = current->signal->has_child_subreaper ||
- current->signal->is_child_subreaper;
-
mutex_init(&sig->cred_guard_mutex);
return 0;
#endif
}
+#ifdef CONFIG_POSIX_TIMERS
/*
* Initialize POSIX timer handling for a single task.
*/
INIT_LIST_HEAD(&tsk->cpu_timers[1]);
INIT_LIST_HEAD(&tsk->cpu_timers[2]);
}
+#else
+static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
+#endif
static inline void
init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
p->signal->leader_pid = pid;
p->signal->tty = tty_kref_get(current->signal->tty);
+ /*
+ * Inherit has_child_subreaper flag under the same
+ * tasklist_lock with adding child to the process tree
+ * for propagate_has_child_subreaper optimization.
+ */
+ p->signal->has_child_subreaper = p->real_parent->signal->has_child_subreaper ||
+ p->real_parent->signal->is_child_subreaper;
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
attach_pid(p, PIDTYPE_PGID);
}
#endif
+ void walk_process_tree(struct task_struct *top, proc_visitor visitor, void *data)
+ {
+ struct task_struct *leader, *parent, *child;
+ int res;
+
+ read_lock(&tasklist_lock);
+ leader = top = top->group_leader;
+ down:
+ for_each_thread(leader, parent) {
+ list_for_each_entry(child, &parent->children, sibling) {
+ res = visitor(child, data);
+ if (res) {
+ if (res < 0)
+ goto out;
+ leader = child;
+ goto down;
+ }
+ up:
+ ;
+ }
+ }
+
+ if (leader != top) {
+ child = leader;
+ parent = child->real_parent;
+ leader = parent->group_leader;
+ goto up;
+ }
+ out:
+ read_unlock(&tasklist_lock);
+ }
+
#ifndef ARCH_MIN_MMSTRUCT_ALIGN
#define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
void do_sys_times(struct tms *tms)
{
- cputime_t tgutime, tgstime, cutime, cstime;
+ u64 tgutime, tgstime, cutime, cstime;
thread_group_cputime_adjusted(current, &tgutime, &tgstime);
cutime = current->signal->cutime;
cstime = current->signal->cstime;
- tms->tms_utime = cputime_to_clock_t(tgutime);
- tms->tms_stime = cputime_to_clock_t(tgstime);
- tms->tms_cutime = cputime_to_clock_t(cutime);
- tms->tms_cstime = cputime_to_clock_t(cstime);
+ tms->tms_utime = nsec_to_clock_t(tgutime);
+ tms->tms_stime = nsec_to_clock_t(tgstime);
+ tms->tms_cutime = nsec_to_clock_t(cutime);
+ tms->tms_cstime = nsec_to_clock_t(cstime);
}
SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
{
struct task_struct *t;
unsigned long flags;
- cputime_t tgutime, tgstime, utime, stime;
+ u64 tgutime, tgstime, utime, stime;
unsigned long maxrss = 0;
memset((char *)r, 0, sizeof (*r));
unlock_task_sighand(p, &flags);
out:
- cputime_to_timeval(utime, &r->ru_utime);
- cputime_to_timeval(stime, &r->ru_stime);
+ r->ru_utime = ns_to_timeval(utime);
+ r->ru_stime = ns_to_timeval(stime);
if (who != RUSAGE_CHILDREN) {
struct mm_struct *mm = get_task_mm(p);
}
#endif
+ static int propagate_has_child_subreaper(struct task_struct *p, void *data)
+ {
+ /*
+ * If task has has_child_subreaper - all its decendants
+ * already have these flag too and new decendants will
+ * inherit it on fork, skip them.
+ *
+ * If we've found child_reaper - skip descendants in
+ * it's subtree as they will never get out pidns.
+ */
+ if (p->signal->has_child_subreaper ||
+ is_child_reaper(task_pid(p)))
+ return 0;
+
+ p->signal->has_child_subreaper = 1;
+ return 1;
+ }
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
break;
case PR_SET_CHILD_SUBREAPER:
me->signal->is_child_subreaper = !!arg2;
+ if (!arg2)
+ break;
+
+ walk_process_tree(me, propagate_has_child_subreaper, NULL);
break;
case PR_GET_CHILD_SUBREAPER:
error = put_user(me->signal->is_child_subreaper,
static int zero = 0;
static int int_max = INT_MAX;
- #define UCOUNT_ENTRY(name) \
+ #define UCOUNT_ENTRY(name) \
{ \
.procname = name, \
.maxlen = sizeof(int), \
UCOUNT_ENTRY("max_net_namespaces"),
UCOUNT_ENTRY("max_mnt_namespaces"),
UCOUNT_ENTRY("max_cgroup_namespaces"),
+ #ifdef CONFIG_INOTIFY_USER
+ UCOUNT_ENTRY("max_inotify_instances"),
+ UCOUNT_ENTRY("max_inotify_watches"),
+ #endif
{ }
};
#endif /* CONFIG_SYSCTL */
* properly.
*/
user_header = register_sysctl("user", empty);
+ kmemleak_ignore(user_header);
BUG_ON(!user_header);
BUG_ON(!setup_userns_sysctls(&init_user_ns));
#endif
return 0;
}
subsys_initcall(user_namespace_sysctl_init);
-
-
#include "include/match.h"
#include "include/path.h"
#include "include/policy.h"
+#include "include/policy_ns.h"
/**
* aa_free_domain_entries - free entries in a domain table
* Returns: permission set
*/
static struct file_perms change_profile_perms(struct aa_profile *profile,
- struct aa_namespace *ns,
+ struct aa_ns *ns,
const char *name, u32 request,
unsigned int start)
{
*
* Returns: profile or NULL if no match found
*/
-static struct aa_profile *find_attach(struct aa_namespace *ns,
+static struct aa_profile *find_attach(struct aa_ns *ns,
struct list_head *list, const char *name)
{
struct aa_profile *profile;
static struct aa_profile *x_table_lookup(struct aa_profile *profile, u32 xindex)
{
struct aa_profile *new_profile = NULL;
- struct aa_namespace *ns = profile->ns;
+ struct aa_ns *ns = profile->ns;
u32 xtype = xindex & AA_X_TYPE_MASK;
int index = xindex & AA_X_INDEX_MASK;
const char *name;
/* index is guaranteed to be in range, validated at load time */
for (name = profile->file.trans.table[index]; !new_profile && name;
name = next_name(xtype, name)) {
- struct aa_namespace *new_ns;
+ struct aa_ns *new_ns;
const char *xname = NULL;
new_ns = NULL;
;
}
/* released below */
- new_ns = aa_find_namespace(ns, ns_name);
+ new_ns = aa_find_ns(ns, ns_name);
if (!new_ns)
continue;
} else if (*name == '@') {
/* released by caller */
new_profile = aa_lookup_profile(new_ns ? new_ns : ns, xname);
- aa_put_namespace(new_ns);
+ aa_put_ns(new_ns);
}
/* released by caller */
const char *name, u32 xindex)
{
struct aa_profile *new_profile = NULL;
- struct aa_namespace *ns = profile->ns;
+ struct aa_ns *ns = profile->ns;
u32 xtype = xindex & AA_X_TYPE_MASK;
switch (xtype) {
*/
int apparmor_bprm_set_creds(struct linux_binprm *bprm)
{
- struct aa_task_cxt *cxt;
+ struct aa_task_ctx *ctx;
struct aa_profile *profile, *new_profile = NULL;
- struct aa_namespace *ns;
+ struct aa_ns *ns;
char *buffer = NULL;
unsigned int state;
struct file_perms perms = {};
if (bprm->cred_prepared)
return 0;
- cxt = cred_cxt(bprm->cred);
- BUG_ON(!cxt);
+ ctx = cred_ctx(bprm->cred);
+ AA_BUG(!ctx);
- profile = aa_get_newest_profile(cxt->profile);
+ profile = aa_get_newest_profile(ctx->profile);
/*
* get the namespace from the replacement profile as replacement
* can change the namespace
*/
if (unconfined(profile)) {
/* unconfined task */
- if (cxt->onexec)
+ if (ctx->onexec)
/* change_profile on exec already been granted */
- new_profile = aa_get_profile(cxt->onexec);
+ new_profile = aa_get_profile(ctx->onexec);
else
new_profile = find_attach(ns, &ns->base.profiles, name);
if (!new_profile)
/* find exec permissions for name */
state = aa_str_perms(profile->file.dfa, state, name, &cond, &perms);
- if (cxt->onexec) {
+ if (ctx->onexec) {
struct file_perms cp;
info = "change_profile onexec";
- new_profile = aa_get_newest_profile(cxt->onexec);
+ new_profile = aa_get_newest_profile(ctx->onexec);
if (!(perms.allow & AA_MAY_ONEXEC))
goto audit;
* exec\0change_profile
*/
state = aa_dfa_null_transition(profile->file.dfa, state);
- cp = change_profile_perms(profile, cxt->onexec->ns,
- cxt->onexec->base.name,
+ cp = change_profile_perms(profile, ctx->onexec->ns,
+ ctx->onexec->base.name,
AA_MAY_ONEXEC, state);
if (!(cp.allow & AA_MAY_ONEXEC))
}
} else if (COMPLAIN_MODE(profile)) {
/* no exec permission - are we in learning mode */
- new_profile = aa_new_null_profile(profile, 0);
+ new_profile = aa_new_null_profile(profile, false, name,
+ GFP_ATOMIC);
if (!new_profile) {
error = -ENOMEM;
info = "could not create null profile";
;
}
- if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+ if (bprm->unsafe & LSM_UNSAFE_PTRACE) {
error = may_change_ptraced_domain(new_profile);
if (error)
goto audit;
bprm->per_clear |= PER_CLEAR_ON_SETID;
x_clear:
- aa_put_profile(cxt->profile);
- /* transfer new profile reference will be released when cxt is freed */
- cxt->profile = new_profile;
+ aa_put_profile(ctx->profile);
+ /* transfer new profile reference will be released when ctx is freed */
+ ctx->profile = new_profile;
new_profile = NULL;
/* clear out all temporary/transitional state from the context */
- aa_clear_task_cxt_trans(cxt);
+ aa_clear_task_ctx_trans(ctx);
audit:
- error = aa_audit_file(profile, &perms, GFP_KERNEL, OP_EXEC, MAY_EXEC,
- name,
+ error = aa_audit_file(profile, &perms, OP_EXEC, MAY_EXEC, name,
new_profile ? new_profile->base.hname : NULL,
cond.uid, info, error);
void apparmor_bprm_committing_creds(struct linux_binprm *bprm)
{
struct aa_profile *profile = __aa_current_profile();
- struct aa_task_cxt *new_cxt = cred_cxt(bprm->cred);
+ struct aa_task_ctx *new_ctx = cred_ctx(bprm->cred);
/* bail out if unconfined or not changing profile */
- if ((new_cxt->profile == profile) ||
- (unconfined(new_cxt->profile)))
+ if ((new_ctx->profile == profile) ||
+ (unconfined(new_ctx->profile)))
return;
current->pdeath_signal = 0;
/* reset soft limits and set hard limits for the new profile */
- __aa_transition_rlimits(profile, new_cxt->profile);
+ __aa_transition_rlimits(profile, new_ctx->profile);
}
/**
int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
{
const struct cred *cred;
- struct aa_task_cxt *cxt;
+ struct aa_task_ctx *ctx;
struct aa_profile *profile, *previous_profile, *hat = NULL;
char *name = NULL;
int i;
/* released below */
cred = get_current_cred();
- cxt = cred_cxt(cred);
+ ctx = cred_ctx(cred);
profile = aa_get_newest_profile(aa_cred_profile(cred));
- previous_profile = aa_get_newest_profile(cxt->previous);
+ previous_profile = aa_get_newest_profile(ctx->previous);
if (unconfined(profile)) {
info = "unconfined";
aa_put_profile(root);
target = name;
/* released below */
- hat = aa_new_null_profile(profile, 1);
+ hat = aa_new_null_profile(profile, true, hats[0],
+ GFP_KERNEL);
if (!hat) {
info = "failed null profile create";
error = -ENOMEM;
audit:
if (!permtest)
- error = aa_audit_file(profile, &perms, GFP_KERNEL,
- OP_CHANGE_HAT, AA_MAY_CHANGEHAT, NULL,
- target, GLOBAL_ROOT_UID, info, error);
+ error = aa_audit_file(profile, &perms, OP_CHANGE_HAT,
+ AA_MAY_CHANGEHAT, NULL, target,
+ GLOBAL_ROOT_UID, info, error);
out:
aa_put_profile(hat);
/**
* aa_change_profile - perform a one-way profile transition
- * @ns_name: name of the profile namespace to change to (MAYBE NULL)
- * @hname: name of profile to change to (MAYBE NULL)
+ * @fqname: name of profile may include namespace (NOT NULL)
* @onexec: whether this transition is to take place immediately or at exec
* @permtest: true if this is just a permission test
*
*
* Returns %0 on success, error otherwise.
*/
-int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
- bool permtest)
+int aa_change_profile(const char *fqname, bool onexec,
+ bool permtest, bool stack)
{
const struct cred *cred;
struct aa_profile *profile, *target = NULL;
- struct aa_namespace *ns = NULL;
struct file_perms perms = {};
- const char *name = NULL, *info = NULL;
- int op, error = 0;
+ const char *info = NULL, *op;
+ int error = 0;
u32 request;
- if (!hname && !ns_name)
+ if (!fqname || !*fqname) {
+ AA_DEBUG("no profile name");
return -EINVAL;
+ }
if (onexec) {
request = AA_MAY_ONEXEC;
return -EPERM;
}
- if (ns_name) {
- /* released below */
- ns = aa_find_namespace(profile->ns, ns_name);
- if (!ns) {
- /* we don't create new namespace in complain mode */
- name = ns_name;
- info = "namespace not found";
- error = -ENOENT;
- goto audit;
- }
- } else
- /* released below */
- ns = aa_get_namespace(profile->ns);
-
- /* if the name was not specified, use the name of the current profile */
- if (!hname) {
- if (unconfined(profile))
- hname = ns->unconfined->base.hname;
- else
- hname = profile->base.hname;
- }
-
- perms = change_profile_perms(profile, ns, hname, request,
- profile->file.start);
- if (!(perms.allow & request)) {
- error = -EACCES;
- goto audit;
- }
-
- /* released below */
- target = aa_lookup_profile(ns, hname);
+ target = aa_fqlookupn_profile(profile, fqname, strlen(fqname));
if (!target) {
info = "profile not found";
error = -ENOENT;
if (permtest || !COMPLAIN_MODE(profile))
goto audit;
/* released below */
- target = aa_new_null_profile(profile, 0);
+ target = aa_new_null_profile(profile, false, fqname,
+ GFP_KERNEL);
if (!target) {
info = "failed null profile create";
error = -ENOMEM;
}
}
+ perms = change_profile_perms(profile, target->ns, target->base.hname,
+ request, profile->file.start);
+ if (!(perms.allow & request)) {
+ error = -EACCES;
+ goto audit;
+ }
+
/* check if tracing task is allowed to trace target domain */
error = may_change_ptraced_domain(target);
if (error) {
audit:
if (!permtest)
- error = aa_audit_file(profile, &perms, GFP_KERNEL, op, request,
- name, hname, GLOBAL_ROOT_UID, info, error);
+ error = aa_audit_file(profile, &perms, op, request, NULL,
+ fqname, GLOBAL_ROOT_UID, info, error);
- aa_put_namespace(ns);
aa_put_profile(target);
put_cred(cred);
if ((is_setid ||
!cap_issubset(new->cap_permitted, old->cap_permitted)) &&
- bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
+ ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
+ !ptracer_capable(current, new->user_ns))) {
/* downgrade; they get no more than they had, and maybe less */
- if (!capable(CAP_SETUID) ||
+ if (!ns_capable(new->user_ns, CAP_SETUID) ||
(bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
new->euid = new->uid;
new->egid = new->gid;
void __init capability_add_hooks(void)
{
- security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks));
+ security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
+ "capability");
}
#endif /* CONFIG_SECURITY */
return sid;
}
-/*
- * get the subjective security ID of the current task
- */
-static inline u32 current_sid(void)
-{
- const struct task_security_struct *tsec = current_security();
-
- return tsec->sid;
-}
-
/* Allocate and free functions for each kind of security blob. */
static int inode_alloc_security(struct inode *inode)
sbsec->behavior == SECURITY_FS_USE_NATIVE ||
/* Special handling. Genfs but also in-core setxattr handler */
!strcmp(sb->s_type->name, "sysfs") ||
+ !strcmp(sb->s_type->name, "cgroup") ||
+ !strcmp(sb->s_type->name, "cgroup2") ||
!strcmp(sb->s_type->name, "pstore") ||
!strcmp(sb->s_type->name, "debugfs") ||
+ !strcmp(sb->s_type->name, "tracefs") ||
!strcmp(sb->s_type->name, "rootfs");
}
}
/*
- * If this is a user namespace mount, no contexts are allowed
- * on the command line and security labels must be ignored.
+ * If this is a user namespace mount and the filesystem type is not
+ * explicitly whitelisted, then no contexts are allowed on the command
+ * line and security labels must be ignored.
*/
- if (sb->s_user_ns != &init_user_ns) {
+ if (sb->s_user_ns != &init_user_ns &&
+ strcmp(sb->s_type->name, "tmpfs") &&
+ strcmp(sb->s_type->name, "ramfs") &&
+ strcmp(sb->s_type->name, "devpts")) {
if (context_sid || fscontext_sid || rootcontext_sid ||
defcontext_sid) {
rc = -EACCES;
static inline u16 socket_type_to_security_class(int family, int type, int protocol)
{
+ int extsockclass = selinux_policycap_extsockclass;
+
switch (family) {
case PF_UNIX:
switch (type) {
case PF_INET6:
switch (type) {
case SOCK_STREAM:
+ case SOCK_SEQPACKET:
if (default_protocol_stream(protocol))
return SECCLASS_TCP_SOCKET;
+ else if (extsockclass && protocol == IPPROTO_SCTP)
+ return SECCLASS_SCTP_SOCKET;
else
return SECCLASS_RAWIP_SOCKET;
case SOCK_DGRAM:
if (default_protocol_dgram(protocol))
return SECCLASS_UDP_SOCKET;
+ else if (extsockclass && (protocol == IPPROTO_ICMP ||
+ protocol == IPPROTO_ICMPV6))
+ return SECCLASS_ICMP_SOCKET;
else
return SECCLASS_RAWIP_SOCKET;
case SOCK_DCCP:
return SECCLASS_APPLETALK_SOCKET;
}
+ if (extsockclass) {
+ switch (family) {
+ case PF_AX25:
+ return SECCLASS_AX25_SOCKET;
+ case PF_IPX:
+ return SECCLASS_IPX_SOCKET;
+ case PF_NETROM:
+ return SECCLASS_NETROM_SOCKET;
+ case PF_ATMPVC:
+ return SECCLASS_ATMPVC_SOCKET;
+ case PF_X25:
+ return SECCLASS_X25_SOCKET;
+ case PF_ROSE:
+ return SECCLASS_ROSE_SOCKET;
+ case PF_DECnet:
+ return SECCLASS_DECNET_SOCKET;
+ case PF_ATMSVC:
+ return SECCLASS_ATMSVC_SOCKET;
+ case PF_RDS:
+ return SECCLASS_RDS_SOCKET;
+ case PF_IRDA:
+ return SECCLASS_IRDA_SOCKET;
+ case PF_PPPOX:
+ return SECCLASS_PPPOX_SOCKET;
+ case PF_LLC:
+ return SECCLASS_LLC_SOCKET;
+ case PF_CAN:
+ return SECCLASS_CAN_SOCKET;
+ case PF_TIPC:
+ return SECCLASS_TIPC_SOCKET;
+ case PF_BLUETOOTH:
+ return SECCLASS_BLUETOOTH_SOCKET;
+ case PF_IUCV:
+ return SECCLASS_IUCV_SOCKET;
+ case PF_RXRPC:
+ return SECCLASS_RXRPC_SOCKET;
+ case PF_ISDN:
+ return SECCLASS_ISDN_SOCKET;
+ case PF_PHONET:
+ return SECCLASS_PHONET_SOCKET;
+ case PF_IEEE802154:
+ return SECCLASS_IEEE802154_SOCKET;
+ case PF_CAIF:
+ return SECCLASS_CAIF_SOCKET;
+ case PF_ALG:
+ return SECCLASS_ALG_SOCKET;
+ case PF_NFC:
+ return SECCLASS_NFC_SOCKET;
+ case PF_VSOCK:
+ return SECCLASS_VSOCK_SOCKET;
+ case PF_KCM:
+ return SECCLASS_KCM_SOCKET;
+ case PF_QIPCRTR:
+ return SECCLASS_QIPCRTR_SOCKET;
+ case PF_SMC:
+ return SECCLASS_SMC_SOCKET;
+#if PF_MAX > 44
+#error New address family defined, please update this function.
+#endif
+ }
+ }
+
return SECCLASS_SOCKET;
}
return perm;
}
-/*
- * Check permission between a pair of credentials
- * fork check, ptrace check, etc.
- */
-static int cred_has_perm(const struct cred *actor,
- const struct cred *target,
- u32 perms)
-{
- u32 asid = cred_sid(actor), tsid = cred_sid(target);
-
- return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL);
-}
-
-/*
- * Check permission between a pair of tasks, e.g. signal checks,
- * fork check, ptrace check, etc.
- * tsk1 is the actor and tsk2 is the target
- * - this uses the default subjective creds of tsk1
- */
-static int task_has_perm(const struct task_struct *tsk1,
- const struct task_struct *tsk2,
- u32 perms)
-{
- const struct task_security_struct *__tsec1, *__tsec2;
- u32 sid1, sid2;
-
- rcu_read_lock();
- __tsec1 = __task_cred(tsk1)->security; sid1 = __tsec1->sid;
- __tsec2 = __task_cred(tsk2)->security; sid2 = __tsec2->sid;
- rcu_read_unlock();
- return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL);
-}
-
-/*
- * Check permission between current and another task, e.g. signal checks,
- * fork check, ptrace check, etc.
- * current is the actor and tsk2 is the target
- * - this uses current's subjective creds
- */
-static int current_has_perm(const struct task_struct *tsk,
- u32 perms)
-{
- u32 sid, tsid;
-
- sid = current_sid();
- tsid = task_sid(tsk);
- return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL);
-}
-
#if CAP_LAST_CAP > 63
#error Fix SELinux to handle capabilities > 63.
#endif
return rc;
}
-/* Check whether a task is allowed to use a system operation. */
-static int task_has_system(struct task_struct *tsk,
- u32 perms)
-{
- u32 sid = task_sid(tsk);
-
- return avc_has_perm(sid, SECINITSID_KERNEL,
- SECCLASS_SYSTEM, perms, NULL);
-}
-
/* Check whether a task has a particular permission to an inode.
The 'adp' parameter is optional and allows other audit
data to be passed (e.g. the dentry). */
FILESYSTEM__ASSOCIATE, &ad);
}
-/* Check whether a task can create a key. */
-static int may_create_key(u32 ksid,
- struct task_struct *ctx)
-{
- u32 sid = task_sid(ctx);
-
- return avc_has_perm(sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL);
-}
-
#define MAY_LINK 0
#define MAY_UNLINK 1
#define MAY_RMDIR 2
static int selinux_ptrace_access_check(struct task_struct *child,
unsigned int mode)
{
- if (mode & PTRACE_MODE_READ) {
- u32 sid = current_sid();
- u32 csid = task_sid(child);
+ u32 sid = current_sid();
+ u32 csid = task_sid(child);
+
+ if (mode & PTRACE_MODE_READ)
return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL);
- }
- return current_has_perm(child, PROCESS__PTRACE);
+ return avc_has_perm(sid, csid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL);
}
static int selinux_ptrace_traceme(struct task_struct *parent)
{
- return task_has_perm(parent, current, PROCESS__PTRACE);
+ return avc_has_perm(task_sid(parent), current_sid(), SECCLASS_PROCESS,
+ PROCESS__PTRACE, NULL);
}
static int selinux_capget(struct task_struct *target, kernel_cap_t *effective,
kernel_cap_t *inheritable, kernel_cap_t *permitted)
{
- return current_has_perm(target, PROCESS__GETCAP);
+ return avc_has_perm(current_sid(), task_sid(target), SECCLASS_PROCESS,
+ PROCESS__GETCAP, NULL);
}
static int selinux_capset(struct cred *new, const struct cred *old,
const kernel_cap_t *inheritable,
const kernel_cap_t *permitted)
{
- return cred_has_perm(old, new, PROCESS__SETCAP);
+ return avc_has_perm(cred_sid(old), cred_sid(new), SECCLASS_PROCESS,
+ PROCESS__SETCAP, NULL);
}
/*
static int selinux_syslog(int type)
{
- int rc;
-
switch (type) {
case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */
case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */
- rc = task_has_system(current, SYSTEM__SYSLOG_READ);
- break;
+ return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, NULL);
case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */
case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */
/* Set level of messages printed to console */
case SYSLOG_ACTION_CONSOLE_LEVEL:
- rc = task_has_system(current, SYSTEM__SYSLOG_CONSOLE);
- break;
- case SYSLOG_ACTION_CLOSE: /* Close log */
- case SYSLOG_ACTION_OPEN: /* Open log */
- case SYSLOG_ACTION_READ: /* Read from log */
- case SYSLOG_ACTION_READ_CLEAR: /* Read/clear last kernel messages */
- case SYSLOG_ACTION_CLEAR: /* Clear ring buffer */
- default:
- rc = task_has_system(current, SYSTEM__SYSLOG_MOD);
- break;
+ return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE,
+ NULL);
}
- return rc;
+ /* All other syslog types */
+ return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, NULL);
}
/*
/* binprm security operations */
-static u32 ptrace_parent_sid(struct task_struct *task)
+static u32 ptrace_parent_sid(void)
{
u32 sid = 0;
struct task_struct *tracer;
rcu_read_lock();
- tracer = ptrace_parent(task);
+ tracer = ptrace_parent(current);
if (tracer)
sid = task_sid(tracer);
rcu_read_unlock();
/* Make sure that anyone attempting to ptrace over a task that
* changes its SID has the appropriate permit */
- if (bprm->unsafe &
- (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+ if (bprm->unsafe & LSM_UNSAFE_PTRACE) {
- u32 ptsid = ptrace_parent_sid(current);
+ u32 ptsid = ptrace_parent_sid();
if (ptsid != 0) {
rc = avc_has_perm(ptsid, new_tsec->sid,
SECCLASS_PROCESS,
static int file_map_prot_check(struct file *file, unsigned long prot, int shared)
{
const struct cred *cred = current_cred();
+ u32 sid = cred_sid(cred);
int rc = 0;
if (default_noexec &&
* private file mapping that will also be writable.
* This has an additional check.
*/
- rc = cred_has_perm(cred, cred, PROCESS__EXECMEM);
+ rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ PROCESS__EXECMEM, NULL);
if (rc)
goto error;
}
unsigned long prot)
{
const struct cred *cred = current_cred();
+ u32 sid = cred_sid(cred);
if (selinux_checkreqprot)
prot = reqprot;
int rc = 0;
if (vma->vm_start >= vma->vm_mm->start_brk &&
vma->vm_end <= vma->vm_mm->brk) {
- rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP);
+ rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ PROCESS__EXECHEAP, NULL);
} else if (!vma->vm_file &&
((vma->vm_start <= vma->vm_mm->start_stack &&
vma->vm_end >= vma->vm_mm->start_stack) ||
vma_is_stack_for_current(vma))) {
- rc = current_has_perm(current, PROCESS__EXECSTACK);
+ rc = avc_has_perm(sid, sid, SECCLASS_PROCESS,
+ PROCESS__EXECSTACK, NULL);
} else if (vma->vm_file && vma->anon_vma) {
/*
* We are making executable a file mapping that has
static int selinux_task_create(unsigned long clone_flags)
{
- return current_has_perm(current, PROCESS__FORK);
+ u32 sid = current_sid();
+
+ return avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL);
}
/*
static int selinux_kernel_module_request(char *kmod_name)
{
- u32 sid;
struct common_audit_data ad;
- sid = task_sid(current);
-
ad.type = LSM_AUDIT_DATA_KMOD;
ad.u.kmod_name = kmod_name;
- return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM,
+ return avc_has_perm(current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM,
SYSTEM__MODULE_REQUEST, &ad);
}
static int selinux_task_setpgid(struct task_struct *p, pid_t pgid)
{
- return current_has_perm(p, PROCESS__SETPGID);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__SETPGID, NULL);
}
static int selinux_task_getpgid(struct task_struct *p)
{
- return current_has_perm(p, PROCESS__GETPGID);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__GETPGID, NULL);
}
static int selinux_task_getsid(struct task_struct *p)
{
- return current_has_perm(p, PROCESS__GETSESSION);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__GETSESSION, NULL);
}
static void selinux_task_getsecid(struct task_struct *p, u32 *secid)
static int selinux_task_setnice(struct task_struct *p, int nice)
{
- return current_has_perm(p, PROCESS__SETSCHED);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__SETSCHED, NULL);
}
static int selinux_task_setioprio(struct task_struct *p, int ioprio)
{
- return current_has_perm(p, PROCESS__SETSCHED);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__SETSCHED, NULL);
}
static int selinux_task_getioprio(struct task_struct *p)
{
- return current_has_perm(p, PROCESS__GETSCHED);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__GETSCHED, NULL);
}
static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource,
later be used as a safe reset point for the soft limit
upon context transitions. See selinux_bprm_committing_creds. */
if (old_rlim->rlim_max != new_rlim->rlim_max)
- return current_has_perm(p, PROCESS__SETRLIMIT);
+ return avc_has_perm(current_sid(), task_sid(p),
+ SECCLASS_PROCESS, PROCESS__SETRLIMIT, NULL);
return 0;
}
static int selinux_task_setscheduler(struct task_struct *p)
{
- return current_has_perm(p, PROCESS__SETSCHED);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__SETSCHED, NULL);
}
static int selinux_task_getscheduler(struct task_struct *p)
{
- return current_has_perm(p, PROCESS__GETSCHED);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__GETSCHED, NULL);
}
static int selinux_task_movememory(struct task_struct *p)
{
- return current_has_perm(p, PROCESS__SETSCHED);
+ return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS,
+ PROCESS__SETSCHED, NULL);
}
static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
int sig, u32 secid)
{
u32 perm;
- int rc;
if (!sig)
perm = PROCESS__SIGNULL; /* null signal; existence test */
else
perm = signal_to_av(sig);
- if (secid)
- rc = avc_has_perm(secid, task_sid(p),
- SECCLASS_PROCESS, perm, NULL);
- else
- rc = current_has_perm(p, perm);
- return rc;
-}
-
-static int selinux_task_wait(struct task_struct *p)
-{
- return task_has_perm(p, current, PROCESS__SIGCHLD);
+ if (!secid)
+ secid = current_sid();
+ return avc_has_perm(secid, task_sid(p), SECCLASS_PROCESS, perm, NULL);
}
static void selinux_task_to_inode(struct task_struct *p,
socksid);
}
-static int sock_has_perm(struct task_struct *task, struct sock *sk, u32 perms)
+static int sock_has_perm(struct sock *sk, u32 perms)
{
struct sk_security_struct *sksec = sk->sk_security;
struct common_audit_data ad;
struct lsm_network_audit net = {0,};
- u32 tsid = task_sid(task);
if (sksec->sid == SECINITSID_KERNEL)
return 0;
ad.u.net = &net;
ad.u.net->sk = sk;
- return avc_has_perm(tsid, sksec->sid, sksec->sclass, perms, &ad);
+ return avc_has_perm(current_sid(), sksec->sid, sksec->sclass, perms,
+ &ad);
}
static int selinux_socket_create(int family, int type,
u16 family;
int err;
- err = sock_has_perm(current, sk, SOCKET__BIND);
+ err = sock_has_perm(sk, SOCKET__BIND);
if (err)
goto out;
inet_get_local_port_range(sock_net(sk), &low, &high);
- if (snum < max(PROT_SOCK, low) || snum > high) {
+ if (snum < max(inet_prot_sock(sock_net(sk)), low) ||
+ snum > high) {
err = sel_netport_sid(sk->sk_protocol,
snum, &sid);
if (err)
struct sk_security_struct *sksec = sk->sk_security;
int err;
- err = sock_has_perm(current, sk, SOCKET__CONNECT);
+ err = sock_has_perm(sk, SOCKET__CONNECT);
if (err)
return err;
static int selinux_socket_listen(struct socket *sock, int backlog)
{
- return sock_has_perm(current, sock->sk, SOCKET__LISTEN);
+ return sock_has_perm(sock->sk, SOCKET__LISTEN);
}
static int selinux_socket_accept(struct socket *sock, struct socket *newsock)
u16 sclass;
u32 sid;
- err = sock_has_perm(current, sock->sk, SOCKET__ACCEPT);
+ err = sock_has_perm(sock->sk, SOCKET__ACCEPT);
if (err)
return err;
static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg,
int size)
{
- return sock_has_perm(current, sock->sk, SOCKET__WRITE);
+ return sock_has_perm(sock->sk, SOCKET__WRITE);
}
static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg,
int size, int flags)
{
- return sock_has_perm(current, sock->sk, SOCKET__READ);
+ return sock_has_perm(sock->sk, SOCKET__READ);
}
static int selinux_socket_getsockname(struct socket *sock)
{
- return sock_has_perm(current, sock->sk, SOCKET__GETATTR);
+ return sock_has_perm(sock->sk, SOCKET__GETATTR);
}
static int selinux_socket_getpeername(struct socket *sock)
{
- return sock_has_perm(current, sock->sk, SOCKET__GETATTR);
+ return sock_has_perm(sock->sk, SOCKET__GETATTR);
}
static int selinux_socket_setsockopt(struct socket *sock, int level, int optname)
{
int err;
- err = sock_has_perm(current, sock->sk, SOCKET__SETOPT);
+ err = sock_has_perm(sock->sk, SOCKET__SETOPT);
if (err)
return err;
static int selinux_socket_getsockopt(struct socket *sock, int level,
int optname)
{
- return sock_has_perm(current, sock->sk, SOCKET__GETOPT);
+ return sock_has_perm(sock->sk, SOCKET__GETOPT);
}
static int selinux_socket_shutdown(struct socket *sock, int how)
{
- return sock_has_perm(current, sock->sk, SOCKET__SHUTDOWN);
+ return sock_has_perm(sock->sk, SOCKET__SHUTDOWN);
}
static int selinux_socket_unix_stream_connect(struct sock *sock,
goto out;
}
- err = sock_has_perm(current, sk, perm);
+ err = sock_has_perm(sk, perm);
out:
return err;
}
return selinux_nlmsg_perm(sk, skb);
}
-static int ipc_alloc_security(struct task_struct *task,
- struct kern_ipc_perm *perm,
+static int ipc_alloc_security(struct kern_ipc_perm *perm,
u16 sclass)
{
struct ipc_security_struct *isec;
- u32 sid;
isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL);
if (!isec)
return -ENOMEM;
- sid = task_sid(task);
isec->sclass = sclass;
- isec->sid = sid;
+ isec->sid = current_sid();
perm->security = isec;
return 0;
u32 sid = current_sid();
int rc;
- rc = ipc_alloc_security(current, &msq->q_perm, SECCLASS_MSGQ);
+ rc = ipc_alloc_security(&msq->q_perm, SECCLASS_MSGQ);
if (rc)
return rc;
case IPC_INFO:
case MSG_INFO:
/* No specific object, just general system-wide information. */
- return task_has_system(current, SYSTEM__IPC_INFO);
+ return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
case IPC_STAT:
case MSG_STAT:
perms = MSGQ__GETATTR | MSGQ__ASSOCIATE;
u32 sid = current_sid();
int rc;
- rc = ipc_alloc_security(current, &shp->shm_perm, SECCLASS_SHM);
+ rc = ipc_alloc_security(&shp->shm_perm, SECCLASS_SHM);
if (rc)
return rc;
case IPC_INFO:
case SHM_INFO:
/* No specific object, just general system-wide information. */
- return task_has_system(current, SYSTEM__IPC_INFO);
+ return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
case IPC_STAT:
case SHM_STAT:
perms = SHM__GETATTR | SHM__ASSOCIATE;
u32 sid = current_sid();
int rc;
- rc = ipc_alloc_security(current, &sma->sem_perm, SECCLASS_SEM);
+ rc = ipc_alloc_security(&sma->sem_perm, SECCLASS_SEM);
if (rc)
return rc;
case IPC_INFO:
case SEM_INFO:
/* No specific object, just general system-wide information. */
- return task_has_system(current, SYSTEM__IPC_INFO);
+ return avc_has_perm(current_sid(), SECINITSID_KERNEL,
+ SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL);
case GETPID:
case GETNCNT:
case GETZCNT:
int error;
unsigned len;
+ rcu_read_lock();
+ __tsec = __task_cred(p)->security;
+
if (current != p) {
- error = current_has_perm(p, PROCESS__GETATTR);
+ error = avc_has_perm(current_sid(), __tsec->sid,
+ SECCLASS_PROCESS, PROCESS__GETATTR, NULL);
if (error)
- return error;
+ goto bad;
}
- rcu_read_lock();
- __tsec = __task_cred(p)->security;
-
if (!strcmp(name, "current"))
sid = __tsec->sid;
else if (!strcmp(name, "prev"))
sid = __tsec->keycreate_sid;
else if (!strcmp(name, "sockcreate"))
sid = __tsec->sockcreate_sid;
- else
- goto invalid;
+ else {
+ error = -EINVAL;
+ goto bad;
+ }
rcu_read_unlock();
if (!sid)
return error;
return len;
-invalid:
+bad:
rcu_read_unlock();
- return -EINVAL;
+ return error;
}
-static int selinux_setprocattr(struct task_struct *p,
- char *name, void *value, size_t size)
+static int selinux_setprocattr(const char *name, void *value, size_t size)
{
struct task_security_struct *tsec;
struct cred *new;
- u32 sid = 0, ptsid;
+ u32 mysid = current_sid(), sid = 0, ptsid;
int error;
char *str = value;
- if (current != p) {
- /* SELinux only allows a process to change its own
- security attributes. */
- return -EACCES;
- }
-
/*
* Basic control over ability to set these attributes at all.
- * current == p, but we'll pass them separately in case the
- * above restriction is ever removed.
*/
if (!strcmp(name, "exec"))
- error = current_has_perm(p, PROCESS__SETEXEC);
+ error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ PROCESS__SETEXEC, NULL);
else if (!strcmp(name, "fscreate"))
- error = current_has_perm(p, PROCESS__SETFSCREATE);
+ error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ PROCESS__SETFSCREATE, NULL);
else if (!strcmp(name, "keycreate"))
- error = current_has_perm(p, PROCESS__SETKEYCREATE);
+ error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ PROCESS__SETKEYCREATE, NULL);
else if (!strcmp(name, "sockcreate"))
- error = current_has_perm(p, PROCESS__SETSOCKCREATE);
+ error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ PROCESS__SETSOCKCREATE, NULL);
else if (!strcmp(name, "current"))
- error = current_has_perm(p, PROCESS__SETCURRENT);
+ error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS,
+ PROCESS__SETCURRENT, NULL);
else
error = -EINVAL;
if (error)
return error;
/* Obtain a SID for the context, if one was specified. */
- if (size && str[1] && str[1] != '\n') {
+ if (size && str[0] && str[0] != '\n') {
if (str[size-1] == '\n') {
str[size-1] = 0;
size--;
} else if (!strcmp(name, "fscreate")) {
tsec->create_sid = sid;
} else if (!strcmp(name, "keycreate")) {
- error = may_create_key(sid, p);
+ error = avc_has_perm(mysid, sid, SECCLASS_KEY, KEY__CREATE,
+ NULL);
if (error)
goto abort_change;
tsec->keycreate_sid = sid;
/* Check for ptracing, and update the task SID if ok.
Otherwise, leave SID unchanged and fail. */
- ptsid = ptrace_parent_sid(p);
+ ptsid = ptrace_parent_sid();
if (ptsid != 0) {
error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS,
PROCESS__PTRACE, NULL);
LSM_HOOK_INIT(task_getscheduler, selinux_task_getscheduler),
LSM_HOOK_INIT(task_movememory, selinux_task_movememory),
LSM_HOOK_INIT(task_kill, selinux_task_kill),
- LSM_HOOK_INIT(task_wait, selinux_task_wait),
LSM_HOOK_INIT(task_to_inode, selinux_task_to_inode),
LSM_HOOK_INIT(ipc_permission, selinux_ipc_permission),
0, SLAB_PANIC, NULL);
avc_init();
- security_add_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks));
+ security_add_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks), "selinux");
if (avc_add_callback(selinux_netcache_avc_callback, AVC_CALLBACK_RESET))
panic("SELinux: Unable to register AVC netcache callback\n");
#define SMK_SENDING 2
#ifdef SMACK_IPV6_PORT_LABELING
+DEFINE_MUTEX(smack_ipv6_lock);
static LIST_HEAD(smk_ipv6_port_list);
#endif
static struct kmem_cache *smack_inode_cache;
struct smack_rule *orp;
int rc = 0;
- INIT_LIST_HEAD(nhead);
-
list_for_each_entry_rcu(orp, ohead, list) {
nrp = kzalloc(sizeof(struct smack_rule), gfp);
if (nrp == NULL) {
struct smack_known_list_elem *nklep;
struct smack_known_list_elem *oklep;
- INIT_LIST_HEAD(nhead);
-
list_for_each_entry(oklep, ohead, list) {
nklep = kzalloc(sizeof(struct smack_known_list_elem), gfp);
if (nklep == NULL) {
isp->smk_task != sbsp->smk_root)
return 0;
- if (bprm->unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) {
+ if (bprm->unsafe & LSM_UNSAFE_PTRACE) {
struct task_struct *tracer;
rc = 0;
}
/**
- * smack_inode_free_security - free an inode blob
+ * smack_inode_free_rcu - Free inode_smack blob from cache
+ * @head: the rcu_head for getting inode_smack pointer
+ *
+ * Call back function called from call_rcu() to free
+ * the i_security blob pointer in inode
+ */
+static void smack_inode_free_rcu(struct rcu_head *head)
+{
+ struct inode_smack *issp;
+
+ issp = container_of(head, struct inode_smack, smk_rcu);
+ kmem_cache_free(smack_inode_cache, issp);
+}
+
+/**
+ * smack_inode_free_security - free an inode blob using call_rcu()
* @inode: the inode with a blob
*
- * Clears the blob pointer in inode
+ * Clears the blob pointer in inode using RCU
*/
static void smack_inode_free_security(struct inode *inode)
{
- kmem_cache_free(smack_inode_cache, inode->i_security);
- inode->i_security = NULL;
+ struct inode_smack *issp = inode->i_security;
+
+ /*
+ * The inode may still be referenced in a path walk and
+ * a call to smack_inode_permission() can be made
+ * after smack_inode_free_security() is called.
+ * To avoid race condition free the i_security via RCU
+ * and leave the current inode->i_security pointer intact.
+ * The inode will be freed after the RCU grace period too.
+ */
+ call_rcu(&issp->smk_rcu, smack_inode_free_rcu);
}
/**
struct smk_audit_info ad;
struct inode *inode = file_inode(file);
+ if (unlikely(IS_PRIVATE(inode)))
+ return 0;
+
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
smk_ad_setfield_u_fs_path(&ad, file->f_path);
int rc;
struct inode *inode = file_inode(file);
+ if (unlikely(IS_PRIVATE(inode)))
+ return 0;
+
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
smk_ad_setfield_u_fs_path(&ad, file->f_path);
rc = smk_curacc(smk_of_inode(inode), MAY_LOCK, &ad);
int rc = 0;
struct inode *inode = file_inode(file);
+ if (unlikely(IS_PRIVATE(inode)))
+ return 0;
+
switch (cmd) {
case F_GETLK:
break;
if (file == NULL)
return 0;
+ if (unlikely(IS_PRIVATE(file_inode(file))))
+ return 0;
+
isp = file_inode(file)->i_security;
if (isp->smk_mmap == NULL)
return 0;
struct smk_audit_info ad;
int rc;
- if (smack_privileged(CAP_MAC_OVERRIDE))
- return 0;
-
smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
smk_ad_setfield_u_fs_path(&ad, file->f_path);
- rc = smk_access(tsp->smk_task, smk_of_inode(inode), MAY_READ, &ad);
+ rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad);
rc = smk_bu_credfile(cred, file, MAY_READ, rc);
return rc;
return rc;
}
-/**
- * smack_task_wait - Smack access check for waiting
- * @p: task to wait for
- *
- * Returns 0
- */
-static int smack_task_wait(struct task_struct *p)
-{
- /*
- * Allow the operation to succeed.
- * Zombies are bad.
- * In userless environments (e.g. phones) programs
- * get marked with SMACK64EXEC and even if the parent
- * and child shouldn't be talking the parent still
- * may expect to know when the child exits.
- */
- return 0;
-}
-
/**
* smack_task_to_inode - copy task smack into the inode blob
* @p: task to copy from
*/
static void smack_sk_free_security(struct sock *sk)
{
+#ifdef SMACK_IPV6_PORT_LABELING
+ struct smk_port_label *spp;
+
+ if (sk->sk_family == PF_INET6) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
+ if (spp->smk_sock != sk)
+ continue;
+ spp->smk_can_reuse = 1;
+ break;
+ }
+ rcu_read_unlock();
+ }
+#endif
kfree(sk->sk_security);
}
* on the bound socket. Take the changes to the port
* as well.
*/
- list_for_each_entry(spp, &smk_ipv6_port_list, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
if (sk != spp->smk_sock)
continue;
spp->smk_in = ssp->smk_in;
spp->smk_out = ssp->smk_out;
+ rcu_read_unlock();
return;
}
/*
* A NULL address is only used for updating existing
* bound entries. If there isn't one, it's OK.
*/
+ rcu_read_unlock();
return;
}
* Look for an existing port list entry.
* This is an indication that a port is getting reused.
*/
- list_for_each_entry(spp, &smk_ipv6_port_list, list) {
- if (spp->smk_port != port)
+ rcu_read_lock();
+ list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
+ if (spp->smk_port != port || spp->smk_sock_type != sock->type)
continue;
+ if (spp->smk_can_reuse != 1) {
+ rcu_read_unlock();
+ return;
+ }
spp->smk_port = port;
spp->smk_sock = sk;
spp->smk_in = ssp->smk_in;
spp->smk_out = ssp->smk_out;
+ spp->smk_can_reuse = 0;
+ rcu_read_unlock();
return;
}
-
+ rcu_read_unlock();
/*
* A new port entry is required.
*/
spp->smk_sock = sk;
spp->smk_in = ssp->smk_in;
spp->smk_out = ssp->smk_out;
+ spp->smk_sock_type = sock->type;
+ spp->smk_can_reuse = 0;
- list_add(&spp->list, &smk_ipv6_port_list);
+ mutex_lock(&smack_ipv6_lock);
+ list_add_rcu(&spp->list, &smk_ipv6_port_list);
+ mutex_unlock(&smack_ipv6_lock);
return;
}
return 0;
port = ntohs(address->sin6_port);
- list_for_each_entry(spp, &smk_ipv6_port_list, list) {
- if (spp->smk_port != port)
+ rcu_read_lock();
+ list_for_each_entry_rcu(spp, &smk_ipv6_port_list, list) {
+ if (spp->smk_port != port || spp->smk_sock_type != sk->sk_type)
continue;
object = spp->smk_in;
if (act == SMK_CONNECTING)
ssp->smk_packet = spp->smk_out;
break;
}
+ rcu_read_unlock();
return smk_ipv6_check(skp, object, address, act);
}
case PIPEFS_MAGIC:
isp->smk_inode = smk_of_current();
break;
+ case SOCKFS_MAGIC:
+ /*
+ * Socket access is controlled by the socket
+ * structures associated with the task involved.
+ */
+ isp->smk_inode = &smack_known_star;
+ break;
default:
isp->smk_inode = sbsp->smk_root;
break;
*/
switch (sbp->s_magic) {
case SMACK_MAGIC:
- case PIPEFS_MAGIC:
- case SOCKFS_MAGIC:
case CGROUP_SUPER_MAGIC:
/*
* Casey says that it's a little embarrassing
* that the smack file system doesn't do
* extended attributes.
*
- * Casey says pipes are easy (?)
- *
- * Socket access is controlled by the socket
- * structures associated with the task involved.
- *
* Cgroupfs is special
*/
final = &smack_known_star;
/**
* smack_setprocattr - Smack process attribute setting
- * @p: the object task
* @name: the name of the attribute in /proc/.../attr
* @value: the value to set
* @size: the size of the value
*
* Returns the length of the smack label or an error code
*/
-static int smack_setprocattr(struct task_struct *p, char *name,
- void *value, size_t size)
+static int smack_setprocattr(const char *name, void *value, size_t size)
{
struct task_smack *tsp = current_security();
struct cred *new;
struct smack_known_list_elem *sklep;
int rc;
- /*
- * Changing another process' Smack value is too dangerous
- * and supports no sane use case.
- */
- if (p != current)
- return -EPERM;
-
if (!smack_privileged(CAP_MAC_ADMIN) && list_empty(&tsp->smk_relabel))
return -EPERM;
* ambient value.
*/
rcu_read_lock();
- list_for_each_entry(skp, &smack_known_list, list) {
+ list_for_each_entry_rcu(skp, &smack_known_list, list) {
if (sap->attr.mls.lvl != skp->smk_netlabel.attr.mls.lvl)
continue;
/*
LSM_HOOK_INIT(task_getscheduler, smack_task_getscheduler),
LSM_HOOK_INIT(task_movememory, smack_task_movememory),
LSM_HOOK_INIT(task_kill, smack_task_kill),
- LSM_HOOK_INIT(task_wait, smack_task_wait),
LSM_HOOK_INIT(task_to_inode, smack_task_to_inode),
LSM_HOOK_INIT(ipc_permission, smack_ipc_permission),
/*
* Register with LSM
*/
- security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks));
+ security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack");
return 0;
}