membarrier: Provide register expedited private command

author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Thu, 19 Oct 2017 17:30:15 +0000 (13:30 -0400)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 20 Oct 2017 02:13:40 +0000 (22:13 -0400)
author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Thu, 19 Oct 2017 17:30:15 +0000 (13:30 -0400)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 20 Oct 2017 02:13:40 +0000 (22:13 -0400)
diff --git a/fs/exec.c b/fs/exec.c

index 5470d3c..3e14ba2 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1802,6 +1802,7 @@ static int do_execveat_common(int fd, struct filename *filename,
         /* execve succeeded */
         current->fs->in_exec = 0;
         current->in_execve = 0;
+       membarrier_execve(current);
         acct_update_integrals(current);
         task_numa_free(current);
         free_bprm(bprm);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 46f4ecf..1861ea8 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -445,6 +445,9 @@ struct mm_struct {
         unsigned long flags; /* Must use atomic bitops to access the bits */
  
         struct core_state *core_state; /* coredumping support */
+#ifdef CONFIG_MEMBARRIER
+       atomic_t membarrier_state;
+#endif
  #ifdef CONFIG_AIO
         spinlock_t                      ioctx_lock;
         struct kioctx_table __rcu       *ioctx_table;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h

index ae53e41..ab9bf7b 100644 (file)
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
         current->flags = (current->flags & ~PF_MEMALLOC) | flags;
  }
  
+#ifdef CONFIG_MEMBARRIER
+enum {
+       MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY        = (1U << 0),
+       MEMBARRIER_STATE_SWITCH_MM                      = (1U << 1),
+};
+
+static inline void membarrier_execve(struct task_struct *t)
+{
+       atomic_set(&t->mm->membarrier_state, 0);
+}
+#else
+static inline void membarrier_execve(struct task_struct *t)
+{
+}
+#endif
+
  #endif /* _LINUX_SCHED_MM_H */
diff --git a/include/uapi/linux/membarrier.h b/include/uapi/linux/membarrier.h

index 6d47b32..4e01ad7 100644 (file)
--- a/include/uapi/linux/membarrier.h
+++ b/include/uapi/linux/membarrier.h
@@ -52,21 +52,30 @@
   *                          (non-running threads are de facto in such a
   *                          state). This only covers threads from the
   *                          same processes as the caller thread. This
- *                          command returns 0. The "expedited" commands
- *                          complete faster than the non-expedited ones,
- *                          they never block, but have the downside of
- *                          causing extra overhead.
+ *                          command returns 0 on success. The
+ *                          "expedited" commands complete faster than
+ *                          the non-expedited ones, they never block,
+ *                          but have the downside of causing extra
+ *                          overhead. A process needs to register its
+ *                          intent to use the private expedited command
+ *                          prior to using it, otherwise this command
+ *                          returns -EPERM.
+ * @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
+ *                          Register the process intent to use
+ *                          MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
+ *                          returns 0.
   *
   * Command to be passed to the membarrier system call. The commands need to
   * be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
   * the value 0.
   */
  enum membarrier_cmd {
-       MEMBARRIER_CMD_QUERY                    = 0,
-       MEMBARRIER_CMD_SHARED                   = (1 << 0),
+       MEMBARRIER_CMD_QUERY                            = 0,
+       MEMBARRIER_CMD_SHARED                           = (1 << 0),
         /* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
         /* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
-       MEMBARRIER_CMD_PRIVATE_EXPEDITED        = (1 << 3),
+       MEMBARRIER_CMD_PRIVATE_EXPEDITED                = (1 << 3),
+       MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED       = (1 << 4),
  };
  
  #endif /* _UAPI_LINUX_MEMBARRIER_H */
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c

index a92fddc..dd79087 100644 (file)
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -18,6 +18,7 @@
  #include <linux/membarrier.h>
  #include <linux/tick.h>
  #include <linux/cpumask.h>
+#include <linux/atomic.h>
  
  #include "sched.h"     /* for cpu_rq(). */
  
@@ -26,21 +27,26 @@
   * except MEMBARRIER_CMD_QUERY.
   */
  #define MEMBARRIER_CMD_BITMASK \
-       (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED)
+       (MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED       \
+       | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
  
  static void ipi_mb(void *info)
  {
         smp_mb();       /* IPIs should be serializing but paranoid. */
  }
  
-static void membarrier_private_expedited(void)
+static int membarrier_private_expedited(void)
  {
         int cpu;
         bool fallback = false;
         cpumask_var_t tmpmask;
  
+       if (!(atomic_read(&current->mm->membarrier_state)
+                       & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
+               return -EPERM;
+
         if (num_online_cpus() == 1)
-               return;
+               return 0;
  
         /*
          * Matches memory barriers around rq->curr modification in
@@ -94,6 +100,24 @@ static void membarrier_private_expedited(void)
          * rq->curr modification in scheduler.
          */
         smp_mb();       /* exit from system call is not a mb */
+       return 0;
+}
+
+static void membarrier_register_private_expedited(void)
+{
+       struct task_struct *p = current;
+       struct mm_struct *mm = p->mm;
+
+       /*
+        * We need to consider threads belonging to different thread
+        * groups, which use the same mm. (CLONE_VM but not
+        * CLONE_THREAD).
+        */
+       if (atomic_read(&mm->membarrier_state)
+                       & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
+               return;
+       atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+                       &mm->membarrier_state);
  }
  
  /**
@@ -144,7 +168,9 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
                         synchronize_sched();
                 return 0;
         case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
-               membarrier_private_expedited();
+               return membarrier_private_expedited();
+       case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
+               membarrier_register_private_expedited();
                 return 0;
         default:
                 return -EINVAL;
author	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Thu, 19 Oct 2017 17:30:15 +0000 (13:30 -0400)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 20 Oct 2017 02:13:40 +0000 (22:13 -0400)
fs/exec.c		patch \| blob \| history
include/linux/mm_types.h		patch \| blob \| history
include/linux/sched/mm.h		patch \| blob \| history
include/uapi/linux/membarrier.h		patch \| blob \| history
kernel/sched/membarrier.c		patch \| blob \| history