X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=ipc%2Fmqueue.c;h=49a05ba3000dbab9f6821e3deb3a5223c29a528a;hb=b27a939e8376a3f1ed09b9c33ef44d20f18ec3d0;hp=3d920ff15c80dd26532c72c0c3dea77ef6557a9e;hpb=8a86b00a437ec06b298477463c7a9b8774570507;p=linux-2.6-microblaze.git diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 3d920ff15c80..49a05ba3000d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -63,6 +63,66 @@ struct posix_msg_tree_node { int priority; }; +/* + * Locking: + * + * Accesses to a message queue are synchronized by acquiring info->lock. + * + * There are two notable exceptions: + * - The actual wakeup of a sleeping task is performed using the wake_q + * framework. info->lock is already released when wake_up_q is called. + * - The exit codepaths after sleeping check ext_wait_queue->state without + * any locks. If it is STATE_READY, then the syscall is completed without + * acquiring info->lock. + * + * MQ_BARRIER: + * To achieve proper release/acquire memory barrier pairing, the state is set to + * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed + * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used. + * + * This prevents the following races: + * + * 1) With the simple wake_q_add(), the task could be gone already before + * the increase of the reference happens + * Thread A + * Thread B + * WRITE_ONCE(wait.state, STATE_NONE); + * schedule_hrtimeout() + * wake_q_add(A) + * if (cmpxchg()) // success + * ->state = STATE_READY (reordered) + * + * if (wait.state == STATE_READY) return; + * sysret to user space + * sys_exit() + * get_task_struct() // UaF + * + * Solution: Use wake_q_add_safe() and perform the get_task_struct() before + * the smp_store_release() that does ->state = STATE_READY. + * + * 2) Without proper _release/_acquire barriers, the woken up task + * could read stale data + * + * Thread A + * Thread B + * do_mq_timedreceive + * WRITE_ONCE(wait.state, STATE_NONE); + * schedule_hrtimeout() + * state = STATE_READY; + * + * if (wait.state == STATE_READY) return; + * msg_ptr = wait.msg; // Access to stale data! + * receiver->msg = message; (reordered) + * + * Solution: use _release and _acquire barriers. + * + * 3) There is intentionally no barrier when setting current->state + * to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the + * release memory barrier, and the wakeup is triggered when holding + * info->lock, i.e. spin_lock(&info->lock) provided a pairing + * acquire memory barrier. + */ + struct ext_wait_queue { /* queue of sleeping tasks */ struct task_struct *task; struct list_head list; @@ -646,18 +706,23 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, wq_add(info, sr, ewp); for (;;) { + /* memory barrier not required, we hold info->lock */ __set_current_state(TASK_INTERRUPTIBLE); spin_unlock(&info->lock); time = schedule_hrtimeout_range_clock(timeout, 0, HRTIMER_MODE_ABS, CLOCK_REALTIME); - if (ewp->state == STATE_READY) { + if (READ_ONCE(ewp->state) == STATE_READY) { + /* see MQ_BARRIER for purpose/pairing */ + smp_acquire__after_ctrl_dep(); retval = 0; goto out; } spin_lock(&info->lock); - if (ewp->state == STATE_READY) { + + /* we hold info->lock, so no memory barrier required */ + if (READ_ONCE(ewp->state) == STATE_READY) { retval = 0; goto out_unlock; } @@ -918,6 +983,18 @@ out_name: * The same algorithm is used for senders. */ +static inline void __pipelined_op(struct wake_q_head *wake_q, + struct mqueue_inode_info *info, + struct ext_wait_queue *this) +{ + list_del(&this->list); + get_task_struct(this->task); + + /* see MQ_BARRIER for purpose/pairing */ + smp_store_release(&this->state, STATE_READY); + wake_q_add_safe(wake_q, this->task); +} + /* pipelined_send() - send a message directly to the task waiting in * sys_mq_timedreceive() (without inserting message into a queue). */ @@ -927,17 +1004,7 @@ static inline void pipelined_send(struct wake_q_head *wake_q, struct ext_wait_queue *receiver) { receiver->msg = message; - list_del(&receiver->list); - wake_q_add(wake_q, receiver->task); - /* - * Rely on the implicit cmpxchg barrier from wake_q_add such - * that we can ensure that updating receiver->state is the last - * write operation: As once set, the receiver can continue, - * and if we don't have the reference count from the wake_q, - * yet, at that point we can later have a use-after-free - * condition and bogus wakeup. - */ - receiver->state = STATE_READY; + __pipelined_op(wake_q, info, receiver); } /* pipelined_receive() - if there is task waiting in sys_mq_timedsend() @@ -955,9 +1022,7 @@ static inline void pipelined_receive(struct wake_q_head *wake_q, if (msg_insert(sender->msg, info)) return; - list_del(&sender->list); - wake_q_add(wake_q, sender->task); - sender->state = STATE_READY; + __pipelined_op(wake_q, info, sender); } static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, @@ -1044,7 +1109,9 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, } else { wait.task = current; wait.msg = (void *) msg_ptr; - wait.state = STATE_NONE; + + /* memory barrier not required, we hold info->lock */ + WRITE_ONCE(wait.state, STATE_NONE); ret = wq_sleep(info, SEND, timeout, &wait); /* * wq_sleep must be called with info->lock held, and @@ -1147,7 +1214,9 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, ret = -EAGAIN; } else { wait.task = current; - wait.state = STATE_NONE; + + /* memory barrier not required, we hold info->lock */ + WRITE_ONCE(wait.state, STATE_NONE); ret = wq_sleep(info, RECV, timeout, &wait); msg_ptr = wait.msg; }