perf bench futex, requeue: Add --pi parameter
authorDavidlohr Bueso <dave@stgolabs.net>
Mon, 9 Aug 2021 04:33:01 +0000 (21:33 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 9 Aug 2021 15:00:27 +0000 (12:00 -0300)
This extends the program to measure WAIT_REQUEUE_PI+CMP_REQUEUE_PI
pairs, which are the underlying machinery behind priority-inheritance
aware condition variables. The defaults are the same as with the regular
non-pi version, requeueing one task at a time, with the exception that
PI will always wakeup the first waiter.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20210809043301.66002-8-dave@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/bench/futex-requeue.c
tools/perf/bench/futex.h

index e23a080..97fe31f 100644 (file)
@@ -6,7 +6,8 @@
  *                on futex2, N at a time.
  *
  * This program is particularly useful to measure the latency of nthread
- * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ * requeues without waking up any tasks (in the non-pi case) -- thus
+ * mimicking a regular futex_wait.
  */
 
 /* For the CLR_() macros */
@@ -54,6 +55,8 @@ static const struct option options[] = {
        OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
        OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
        OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
+       OPT_BOOLEAN( 'p', "pi", &params.pi, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),
+
        OPT_END()
 };
 
@@ -87,14 +90,30 @@ static void *workerfn(void *arg __maybe_unused)
        pthread_mutex_unlock(&thread_lock);
 
        while (1) {
-               ret = futex_wait(&futex1, 0, NULL, futex_flag);
-               if (!ret)
-                       break;
-
-               if (ret && errno != EAGAIN) {
-                       if (!params.silent)
-                               warn("futex_wait");
-                       break;
+               if (!params.pi) {
+                       ret = futex_wait(&futex1, 0, NULL, futex_flag);
+                       if (!ret)
+                               break;
+
+                       if (ret && errno != EAGAIN) {
+                               if (!params.silent)
+                                       warnx("futex_wait");
+                               break;
+                       }
+               } else {
+                       ret = futex_wait_requeue_pi(&futex1, 0, &futex2,
+                                                   NULL, futex_flag);
+                       if (!ret) {
+                               /* got the lock at futex2 */
+                               futex_unlock_pi(&futex2, futex_flag);
+                               break;
+                       }
+
+                       if (ret && errno != EAGAIN) {
+                               if (!params.silent)
+                                       warnx("futex_wait_requeue_pi");
+                               break;
+                       }
                }
        }
 
@@ -171,9 +190,10 @@ int bench_futex_requeue(int argc, const char **argv)
        if (params.broadcast)
                params.nrequeue = params.nthreads;
 
-       printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
+       printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
               "%d at a time.\n\n",  getpid(), params.nthreads,
-              params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue);
+              params.fshared ? "shared":"private", &futex1,
+              params.pi ? "PI ": "", &futex2, params.nrequeue);
 
        init_stats(&requeued_stats);
        init_stats(&requeuetime_stats);
@@ -183,7 +203,7 @@ int bench_futex_requeue(int argc, const char **argv)
        pthread_cond_init(&thread_worker, NULL);
 
        for (j = 0; j < bench_repeat && !done; j++) {
-               unsigned int nrequeued = 0;
+               unsigned int nrequeued = 0, wakeups = 0;
                struct timeval start, end, runtime;
 
                /* create, launch & block all threads */
@@ -201,13 +221,30 @@ int bench_futex_requeue(int argc, const char **argv)
                /* Ok, all threads are patiently blocked, start requeueing */
                gettimeofday(&start, NULL);
                while (nrequeued < params.nthreads) {
+                       int r;
+
                        /*
-                        * Do not wakeup any tasks blocked on futex1, allowing
-                        * us to really measure futex_wait functionality.
+                        * For the regular non-pi case, do not wakeup any tasks
+                        * blocked on futex1, allowing us to really measure
+                        * futex_wait functionality. For the PI case the first
+                        * waiter is always awoken.
                         */
-                       nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
-                                                      params.nrequeue,
-                                                      futex_flag);
+                       if (!params.pi) {
+                               r = futex_cmp_requeue(&futex1, 0, &futex2, 0,
+                                                     params.nrequeue,
+                                                     futex_flag);
+                       } else {
+                               r = futex_cmp_requeue_pi(&futex1, 0, &futex2,
+                                                        params.nrequeue,
+                                                        futex_flag);
+                               wakeups++; /* assume no error */
+                       }
+
+                       if (r < 0)
+                               err(EXIT_FAILURE, "couldn't requeue from %p to %p",
+                                   &futex1, &futex2);
+
+                       nrequeued += r;
                }
 
                gettimeofday(&end, NULL);
@@ -217,16 +254,29 @@ int bench_futex_requeue(int argc, const char **argv)
                update_stats(&requeuetime_stats, runtime.tv_usec);
 
                if (!params.silent) {
-                       printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
-                              j + 1, nrequeued, params.nthreads,
-                              runtime.tv_usec / (double)USEC_PER_MSEC);
+                       if (!params.pi)
+                               printf("[Run %d]: Requeued %d of %d threads in "
+                                      "%.4f ms\n", j + 1, nrequeued,
+                                      params.nthreads,
+                                      runtime.tv_usec / (double)USEC_PER_MSEC);
+                       else {
+                               nrequeued -= wakeups;
+                               printf("[Run %d]: Awoke and Requeued (%d+%d) of "
+                                      "%d threads in %.4f ms\n",
+                                      j + 1, wakeups, nrequeued,
+                                      params.nthreads,
+                                      runtime.tv_usec / (double)USEC_PER_MSEC);
+                       }
+
                }
 
-               /* everybody should be blocked on futex2, wake'em up */
-               nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
-               if (params.nthreads != nrequeued)
-                       warnx("couldn't wakeup all tasks (%d/%d)",
-                             nrequeued, params.nthreads);
+               if (!params.pi) {
+                       /* everybody should be blocked on futex2, wake'em up */
+                       nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
+                       if (params.nthreads != nrequeued)
+                               warnx("couldn't wakeup all tasks (%d/%d)",
+                                     nrequeued, params.nthreads);
+               }
 
                for (i = 0; i < params.nthreads; i++) {
                        ret = pthread_join(worker[i], NULL);
index 40a89f1..b3853aa 100644 (file)
@@ -18,6 +18,7 @@ struct bench_futex_parameters {
        bool fshared;
        bool mlockall;
        bool multi; /* lock-pi */
+       bool pi; /* requeue-pi */
        bool broadcast; /* requeue */
        unsigned int runtime; /* seconds*/
        unsigned int nthreads;
@@ -90,7 +91,7 @@ futex_unlock_pi(u_int32_t *uaddr, int opflags)
 /**
 * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
 * @nr_wake:        wake up to this many tasks
-* @nr_requeue:        requeue up to this many tasks
+* @nr_requeue:     requeue up to this many tasks
 */
 static inline int
 futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
@@ -99,4 +100,38 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
        return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
                 val, opflags);
 }
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr:     non-PI futex source
+ * @uaddr2:    PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
+                     struct timespec *timeout, int opflags)
+{
+       return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+                    opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2
+ * @uaddr:     non-PI futex source
+ * @uaddr2:    PI futex target
+ * @nr_requeue:        requeue up to this many tasks
+ *
+ * This is the second half of the requeue_pi mechanism. It shall always be
+ * paired with futex_wait_requeue_pi(). The first waker is always awoken.
+ */
+static inline int
+futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
+                    int nr_requeue, int opflags)
+{
+       return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2,
+                    val, opflags);
+}
+
 #endif /* _FUTEX_H */