netfilter: conntrack: Pass value of ctinfo to __nf_conntrack_update
[linux-2.6-microblaze.git] / fs / eventpoll.c
index 8c59664..12eebcd 100644 (file)
@@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi)
 {
        struct eventpoll *ep = epi->ep;
 
+       /* Fast preliminary check */
+       if (epi->next != EP_UNACTIVE_PTR)
+               return false;
+
        /* Check that the same epi has not been just chained from another CPU */
        if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
                return false;
@@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
         * chained in ep->ovflist and requeued later on.
         */
        if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
-               if (epi->next == EP_UNACTIVE_PTR &&
-                   chain_epi_lockless(epi))
+               if (chain_epi_lockless(epi))
+                       ep_pm_stay_awake_rcu(epi);
+       } else if (!ep_is_linked(epi)) {
+               /* In the usual case, add event to ready list. */
+               if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
                        ep_pm_stay_awake_rcu(epi);
-               goto out_unlock;
-       }
-
-       /* If this file is already in the ready list we exit soon */
-       if (!ep_is_linked(epi) &&
-           list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) {
-               ep_pm_stay_awake_rcu(epi);
        }
 
        /*
@@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 {
        int res = 0, eavail, timed_out = 0;
        u64 slack = 0;
-       bool waiter = false;
        wait_queue_entry_t wait;
        ktime_t expires, *to = NULL;
 
@@ -1867,55 +1866,75 @@ fetch_events:
         */
        ep_reset_busy_poll_napi_id(ep);
 
-       /*
-        * We don't have any available event to return to the caller.  We need
-        * to sleep here, and we will be woken by ep_poll_callback() when events
-        * become available.
-        */
-       if (!waiter) {
-               waiter = true;
-               init_waitqueue_entry(&wait, current);
+       do {
+               /*
+                * Internally init_wait() uses autoremove_wake_function(),
+                * thus wait entry is removed from the wait queue on each
+                * wakeup. Why it is important? In case of several waiters
+                * each new wakeup will hit the next waiter, giving it the
+                * chance to harvest new event. Otherwise wakeup can be
+                * lost. This is also good performance-wise, because on
+                * normal wakeup path no need to call __remove_wait_queue()
+                * explicitly, thus ep->lock is not taken, which halts the
+                * event delivery.
+                */
+               init_wait(&wait);
 
                write_lock_irq(&ep->lock);
-               __add_wait_queue_exclusive(&ep->wq, &wait);
-               write_unlock_irq(&ep->lock);
-       }
-
-       for (;;) {
                /*
-                * We don't want to sleep if the ep_poll_callback() sends us
-                * a wakeup in between. That's why we set the task state
-                * to TASK_INTERRUPTIBLE before doing the checks.
+                * Barrierless variant, waitqueue_active() is called under
+                * the same lock on wakeup ep_poll_callback() side, so it
+                * is safe to avoid an explicit barrier.
                 */
-               set_current_state(TASK_INTERRUPTIBLE);
+               __set_current_state(TASK_INTERRUPTIBLE);
+
                /*
-                * Always short-circuit for fatal signals to allow
-                * threads to make a timely exit without the chance of
-                * finding more events available and fetching
-                * repeatedly.
+                * Do the final check under the lock. ep_scan_ready_list()
+                * plays with two lists (->rdllist and ->ovflist) and there
+                * is always a race when both lists are empty for short
+                * period of time although events are pending, so lock is
+                * important.
                 */
-               if (fatal_signal_pending(current)) {
-                       res = -EINTR;
-                       break;
+               eavail = ep_events_available(ep);
+               if (!eavail) {
+                       if (signal_pending(current))
+                               res = -EINTR;
+                       else
+                               __add_wait_queue_exclusive(&ep->wq, &wait);
                }
+               write_unlock_irq(&ep->lock);
 
-               eavail = ep_events_available(ep);
-               if (eavail)
+               if (eavail || res)
                        break;
-               if (signal_pending(current)) {
-                       res = -EINTR;
-                       break;
-               }
 
                if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
                        timed_out = 1;
                        break;
                }
-       }
+
+               /* We were woken up, thus go and try to harvest some events */
+               eavail = 1;
+
+       } while (0);
 
        __set_current_state(TASK_RUNNING);
 
+       if (!list_empty_careful(&wait.entry)) {
+               write_lock_irq(&ep->lock);
+               __remove_wait_queue(&ep->wq, &wait);
+               write_unlock_irq(&ep->lock);
+       }
+
 send_events:
+       if (fatal_signal_pending(current)) {
+               /*
+                * Always short-circuit for fatal signals to allow
+                * threads to make a timely exit without the chance of
+                * finding more events available and fetching
+                * repeatedly.
+                */
+               res = -EINTR;
+       }
        /*
         * Try to transfer events to user space. In case we get 0 events and
         * there's still timeout left over, we go trying again in search of
@@ -1925,12 +1944,6 @@ send_events:
            !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
                goto fetch_events;
 
-       if (waiter) {
-               write_lock_irq(&ep->lock);
-               __remove_wait_queue(&ep->wq, &wait);
-               write_unlock_irq(&ep->lock);
-       }
-
        return res;
 }