X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=fs%2Feventpoll.c;h=a829af074eb58ec1ec0fe7c46533d3a0269bb851;hb=e7dd91c456a8cdbcd7066997d15e36d14276a949;hp=d8eef21575871560f42a9eefa19951f58c220751;hpb=e411596d48b5b77632deb91afcbc3185b9b658cb;p=linux-2.6-microblaze.git diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d8eef2157587..a829af074eb5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -389,19 +389,24 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time) * * we must do our busy polling with irqs enabled */ -static void ep_busy_loop(struct eventpoll *ep, int nonblock) +static bool ep_busy_loop(struct eventpoll *ep, int nonblock) { unsigned int napi_id = READ_ONCE(ep->napi_id); - if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) + if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) { napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false, BUSY_POLL_BUDGET); -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) -{ - if (ep->napi_id) + if (ep_events_available(ep)) + return true; + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ ep->napi_id = 0; + return false; + } + return false; } /* @@ -441,12 +446,9 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) #else -static inline void ep_busy_loop(struct eventpoll *ep, int nonblock) -{ -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) +static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock) { + return false; } static inline void ep_set_busy_poll_napi_id(struct epitem *epi) @@ -1710,15 +1712,25 @@ static int ep_send_events(struct eventpoll *ep, return res; } -static inline struct timespec64 ep_set_mstimeout(long ms) +static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) { - struct timespec64 now, ts = { - .tv_sec = ms / MSEC_PER_SEC, - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), - }; + struct timespec64 now; + + if (ms < 0) + return NULL; + + if (!ms) { + to->tv_sec = 0; + to->tv_nsec = 0; + return to; + } + + to->tv_sec = ms / MSEC_PER_SEC; + to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC); ktime_get_ts64(&now); - return timespec64_add_safe(now, ts); + *to = timespec64_add_safe(now, *to); + return to; } /** @@ -1730,8 +1742,8 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in - * milliseconds. If the @timeout is zero, the function will not block, - * while if the @timeout is less than zero, the function will block + * timespec. If the timeout is zero, the function will not block, + * while if the @timeout ptr is NULL, the function will block * until at least one event has been retrieved (or an error * occurred). * @@ -1739,7 +1751,7 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout) + int maxevents, struct timespec64 *timeout) { int res, eavail, timed_out = 0; u64 slack = 0; @@ -1748,46 +1760,47 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, lockdep_assert_irqs_enabled(); - if (timeout > 0) { - struct timespec64 end_time = ep_set_mstimeout(timeout); - - slack = select_estimate_accuracy(&end_time); + if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { + slack = select_estimate_accuracy(timeout); to = &expires; - *to = timespec64_to_ktime(end_time); - } else if (timeout == 0) { + *to = timespec64_to_ktime(*timeout); + } else if (timeout) { /* * Avoid the unnecessary trip to the wait queue loop, if the - * caller specified a non blocking operation. We still need - * lock because we could race and not see an epi being added - * to the ready list while in irq callback. Thus incorrectly - * returning 0 back to userspace. + * caller specified a non blocking operation. */ timed_out = 1; - - write_lock_irq(&ep->lock); - eavail = ep_events_available(ep); - write_unlock_irq(&ep->lock); - - goto send_events; } -fetch_events: + /* + * This call is racy: We may or may not see events that are being added + * to the ready list under the lock (e.g., in IRQ callbacks). For, cases + * with a non-zero timeout, this thread will check the ready list under + * lock and will added to the wait queue. For, cases with a zero + * timeout, the user by definition should not care and will have to + * recheck again. + */ + eavail = ep_events_available(ep); - if (!ep_events_available(ep)) - ep_busy_loop(ep, timed_out); + while (1) { + if (eavail) { + /* + * Try to transfer events to user space. In case we get + * 0 events and there's still timeout left over, we go + * trying again in search of more luck. + */ + res = ep_send_events(ep, events, maxevents); + if (res) + return res; + } - eavail = ep_events_available(ep); - if (eavail) - goto send_events; + if (timed_out) + return 0; - /* - * Busy poll timed out. Drop NAPI ID for now, we can add - * it back in when we have moved a socket with a valid NAPI - * ID onto the ready list. - */ - ep_reset_busy_poll_napi_id(ep); + eavail = ep_busy_loop(ep, timed_out); + if (eavail) + continue; - do { if (signal_pending(current)) return -EINTR; @@ -1836,33 +1849,22 @@ fetch_events: * carefully under lock, below. */ eavail = 1; - } while (0); - if (!list_empty_careful(&wait.entry)) { - write_lock_irq(&ep->lock); - /* - * If the thread timed out and is not on the wait queue, it - * means that the thread was woken up after its timeout expired - * before it could reacquire the lock. Thus, when wait.entry is - * empty, it needs to harvest events. - */ - if (timed_out) - eavail = list_empty(&wait.entry); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + /* + * If the thread timed out and is not on the wait queue, + * it means that the thread was woken up after its + * timeout expired before it could reacquire the lock. + * Thus, when wait.entry is empty, it needs to harvest + * events. + */ + if (timed_out) + eavail = list_empty(&wait.entry); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } } - -send_events: - /* - * Try to transfer events to user space. In case we get 0 events and - * there's still timeout left over, we go trying again in search of - * more luck. - */ - if (eavail && - !(res = ep_send_events(ep, events, maxevents)) && !timed_out) - goto fetch_events; - - return res; } /** @@ -2181,7 +2183,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) + int maxevents, struct timespec64 *to) { int error; struct fd f; @@ -2215,7 +2217,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, ep = f.file->private_data; /* Time to fish for events ... */ - error = ep_poll(ep, events, maxevents, timeout); + error = ep_poll(ep, events, maxevents, to); error_fput: fdput(f); @@ -2225,16 +2227,19 @@ error_fput: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - return do_epoll_wait(epfd, events, maxevents, timeout); + struct timespec64 to; + + return do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ -SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, - int, maxevents, int, timeout, const sigset_t __user *, sigmask, - size_t, sigsetsize) +static int do_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *to, + const sigset_t __user *sigmask, size_t sigsetsize) { int error; @@ -2246,18 +2251,47 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, if (error) return error; - error = do_epoll_wait(epfd, events, maxevents, timeout); + error = do_epoll_wait(epfd, events, maxevents, to); + restore_saved_sigmask_unless(error == -EINTR); return error; } +SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) +{ + struct timespec64 to; + + return do_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, + int, maxevents, const struct __kernel_timespec __user *, timeout, + const sigset_t __user *, sigmask, size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, - struct epoll_event __user *, events, - int, maxevents, int, timeout, - const compat_sigset_t __user *, sigmask, - compat_size_t, sigsetsize) +static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) { long err; @@ -2270,10 +2304,46 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, return err; err = do_epoll_wait(epfd, events, maxevents, timeout); + restore_saved_sigmask_unless(err == -EINTR); return err; } + +COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, + struct epoll_event __user *, events, + int, maxevents, int, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 to; + + return do_compat_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd, + struct epoll_event __user *, events, + int, maxevents, + const struct __kernel_timespec __user *, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_compat_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + #endif static int __init eventpoll_init(void)