#include "internal.h"
+/*
+ * New pipe buffers will be restricted to this size while the user is exceeding
+ * their pipe buffer quota. The general pipe use case needs at least two
+ * buffers: one for data yet to be read, and one for new data. If this is less
+ * than two, then a write to a non-empty pipe may block even if the pipe is not
+ * full. This can occur with GNU make jobserver or similar uses of pipes as
+ * semaphores: multiple processes may be waiting to write tokens back to the
+ * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
+ *
+ * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
+ * own risk, namely: pipe writes to non-full pipes may block until the pipe is
+ * emptied.
+ */
+#define PIPE_MIN_DEF_BUFFERS 2
+
/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
#endif
/*
- * Epoll nonsensically wants a wakeup whether the pipe
- * was already empty or not.
- *
* If it wasn't empty we try to merge new data into
* the last buffer.
*
* spanning multiple pages.
*/
head = pipe->head;
- was_empty = true;
+ was_empty = pipe_empty(head, pipe->tail);
chars = total_len & (PAGE_SIZE-1);
- if (chars && !pipe_empty(head, pipe->tail)) {
+ if (chars && !was_empty) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len;
* This is particularly important for small writes, because of
* how (for example) the GNU make jobserver uses small writes to
* wake up pending jobs
+ *
+ * Epoll nonsensically wants a wakeup whether the pipe
+ * was already empty or not.
*/
- if (was_empty) {
+ if (was_empty || pipe->poll_usage) {
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
struct pipe_inode_info *pipe = filp->private_data;
unsigned int head, tail;
+ /* Epoll has some historical nasty semantics, this enables them */
+ pipe->poll_usage = 1;
+
/*
* Reading pipe state only -- no need for acquiring the semaphore.
*
user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
- user_bufs = account_pipe_buffers(user, pipe_bufs, 1);
- pipe_bufs = 1;
+ user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
+ pipe_bufs = PIPE_MIN_DEF_BUFFERS;
}
if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())