perf machine: Use hashtable for machine threads
authorKan Liang <kan.liang@intel.com>
Mon, 11 Sep 2017 02:23:14 +0000 (19:23 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 18 Sep 2017 12:40:19 +0000 (09:40 -0300)
To process any events, it needs to find the thread in the machine first.
The machine maintains a rb tree to store all threads. The rb tree is
protected by a rw lock.

It is not a problem for current perf which serially processing events.
However, it will have scalability performance issue to process events in
parallel, especially on a heavy load system which have many threads.

Introduce a hashtable to divide the big rb tree into many samll rb tree
for threads. The index is thread id % hashtable size. It can reduce the
lock contention.

Committer notes:

Renamed some variables and function names to reduce semantic confusion:

  'struct threads' pointers: thread -> threads
  threads hastable index: tid -> hash_bucket
  struct threads *machine__thread() -> machine__threads()
  Cast tid to (unsigned int) to handle -1 in machine__threads() (Kan Liang)

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Lukasz Odzioba <lukasz.odzioba@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1505096603-215017-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-trace.c
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/rb_resort.h

index 771ddab..ee8c6e8 100644 (file)
@@ -2730,20 +2730,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event
 
 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
 {
-       DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
        size_t printed = trace__fprintf_threads_header(fp);
        struct rb_node *nd;
+       int i;
 
-       if (threads == NULL) {
-               fprintf(fp, "%s", "Error sorting output by nr_events!\n");
-               return 0;
-       }
+       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+               DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
 
-       resort_rb__for_each_entry(nd, threads)
-               printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+               if (threads == NULL) {
+                       fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+                       return 0;
+               }
 
-       resort_rb__delete(threads);
+               resort_rb__for_each_entry(nd, threads)
+                       printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
 
+               resort_rb__delete(threads);
+       }
        return printed;
 }
 
index df70936..f4f9267 100644 (file)
@@ -33,6 +33,20 @@ static void dsos__init(struct dsos *dsos)
        pthread_rwlock_init(&dsos->lock, NULL);
 }
 
+static void machine__threads_init(struct machine *machine)
+{
+       int i;
+
+       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads *threads = &machine->threads[i];
+               threads->entries = RB_ROOT;
+               pthread_rwlock_init(&threads->lock, NULL);
+               threads->nr = 0;
+               INIT_LIST_HEAD(&threads->dead);
+               threads->last_match = NULL;
+       }
+}
+
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
        memset(machine, 0, sizeof(*machine));
@@ -40,11 +54,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
        RB_CLEAR_NODE(&machine->rb_node);
        dsos__init(&machine->dsos);
 
-       machine->threads = RB_ROOT;
-       pthread_rwlock_init(&machine->threads_lock, NULL);
-       machine->nr_threads = 0;
-       INIT_LIST_HEAD(&machine->dead_threads);
-       machine->last_match = NULL;
+       machine__threads_init(machine);
 
        machine->vdso_info = NULL;
        machine->env = NULL;
@@ -141,27 +151,37 @@ static void dsos__exit(struct dsos *dsos)
 void machine__delete_threads(struct machine *machine)
 {
        struct rb_node *nd;
+       int i;
 
-       pthread_rwlock_wrlock(&machine->threads_lock);
-       nd = rb_first(&machine->threads);
-       while (nd) {
-               struct thread *t = rb_entry(nd, struct thread, rb_node);
+       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads *threads = &machine->threads[i];
+               pthread_rwlock_wrlock(&threads->lock);
+               nd = rb_first(&threads->entries);
+               while (nd) {
+                       struct thread *t = rb_entry(nd, struct thread, rb_node);
 
-               nd = rb_next(nd);
-               __machine__remove_thread(machine, t, false);
+                       nd = rb_next(nd);
+                       __machine__remove_thread(machine, t, false);
+               }
+               pthread_rwlock_unlock(&threads->lock);
        }
-       pthread_rwlock_unlock(&machine->threads_lock);
 }
 
 void machine__exit(struct machine *machine)
 {
+       int i;
+
        machine__destroy_kernel_maps(machine);
        map_groups__exit(&machine->kmaps);
        dsos__exit(&machine->dsos);
        machine__exit_vdso(machine);
        zfree(&machine->root_dir);
        zfree(&machine->current_tid);
-       pthread_rwlock_destroy(&machine->threads_lock);
+
+       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads *threads = &machine->threads[i];
+               pthread_rwlock_destroy(&threads->lock);
+       }
 }
 
 void machine__delete(struct machine *machine)
@@ -382,7 +402,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
                                                  pid_t pid, pid_t tid,
                                                  bool create)
 {
-       struct rb_node **p = &machine->threads.rb_node;
+       struct threads *threads = machine__threads(machine, tid);
+       struct rb_node **p = &threads->entries.rb_node;
        struct rb_node *parent = NULL;
        struct thread *th;
 
@@ -391,14 +412,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
         * so most of the time we dont have to look up
         * the full rbtree:
         */
-       th = machine->last_match;
+       th = threads->last_match;
        if (th != NULL) {
                if (th->tid == tid) {
                        machine__update_thread_pid(machine, th, pid);
                        return thread__get(th);
                }
 
-               machine->last_match = NULL;
+               threads->last_match = NULL;
        }
 
        while (*p != NULL) {
@@ -406,7 +427,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
                th = rb_entry(parent, struct thread, rb_node);
 
                if (th->tid == tid) {
-                       machine->last_match = th;
+                       threads->last_match = th;
                        machine__update_thread_pid(machine, th, pid);
                        return thread__get(th);
                }
@@ -423,7 +444,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
        th = thread__new(pid, tid);
        if (th != NULL) {
                rb_link_node(&th->rb_node, parent, p);
-               rb_insert_color(&th->rb_node, &machine->threads);
+               rb_insert_color(&th->rb_node, &threads->entries);
 
                /*
                 * We have to initialize map_groups separately
@@ -434,7 +455,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
                 * leader and that would screwed the rb tree.
                 */
                if (thread__init_map_groups(th, machine)) {
-                       rb_erase_init(&th->rb_node, &machine->threads);
+                       rb_erase_init(&th->rb_node, &threads->entries);
                        RB_CLEAR_NODE(&th->rb_node);
                        thread__put(th);
                        return NULL;
@@ -443,8 +464,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
                 * It is now in the rbtree, get a ref
                 */
                thread__get(th);
-               machine->last_match = th;
-               ++machine->nr_threads;
+               threads->last_match = th;
+               ++threads->nr;
        }
 
        return th;
@@ -458,21 +479,24 @@ struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
                                       pid_t tid)
 {
+       struct threads *threads = machine__threads(machine, tid);
        struct thread *th;
 
-       pthread_rwlock_wrlock(&machine->threads_lock);
+       pthread_rwlock_wrlock(&threads->lock);
        th = __machine__findnew_thread(machine, pid, tid);
-       pthread_rwlock_unlock(&machine->threads_lock);
+       pthread_rwlock_unlock(&threads->lock);
        return th;
 }
 
 struct thread *machine__find_thread(struct machine *machine, pid_t pid,
                                    pid_t tid)
 {
+       struct threads *threads = machine__threads(machine, tid);
        struct thread *th;
-       pthread_rwlock_rdlock(&machine->threads_lock);
+
+       pthread_rwlock_rdlock(&threads->lock);
        th =  ____machine__findnew_thread(machine, pid, tid, false);
-       pthread_rwlock_unlock(&machine->threads_lock);
+       pthread_rwlock_unlock(&threads->lock);
        return th;
 }
 
@@ -719,21 +743,24 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
 
 size_t machine__fprintf(struct machine *machine, FILE *fp)
 {
-       size_t ret;
        struct rb_node *nd;
+       size_t ret;
+       int i;
 
-       pthread_rwlock_rdlock(&machine->threads_lock);
-
-       ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
+       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+               struct threads *threads = &machine->threads[i];
+               pthread_rwlock_rdlock(&threads->lock);
 
-       for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
-               struct thread *pos = rb_entry(nd, struct thread, rb_node);
+               ret = fprintf(fp, "Threads: %u\n", threads->nr);
 
-               ret += thread__fprintf(pos, fp);
-       }
+               for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+                       struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
-       pthread_rwlock_unlock(&machine->threads_lock);
+                       ret += thread__fprintf(pos, fp);
+               }
 
+               pthread_rwlock_unlock(&threads->lock);
+       }
        return ret;
 }
 
@@ -1479,23 +1506,25 @@ out_problem:
 
 static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
 {
-       if (machine->last_match == th)
-               machine->last_match = NULL;
+       struct threads *threads = machine__threads(machine, th->tid);
+
+       if (threads->last_match == th)
+               threads->last_match = NULL;
 
        BUG_ON(refcount_read(&th->refcnt) == 0);
        if (lock)
-               pthread_rwlock_wrlock(&machine->threads_lock);
-       rb_erase_init(&th->rb_node, &machine->threads);
+               pthread_rwlock_wrlock(&threads->lock);
+       rb_erase_init(&th->rb_node, &threads->entries);
        RB_CLEAR_NODE(&th->rb_node);
-       --machine->nr_threads;
+       --threads->nr;
        /*
         * Move it first to the dead_threads list, then drop the reference,
         * if this is the last reference, then the thread__delete destructor
         * will be called and we will remove it from the dead_threads list.
         */
-       list_add_tail(&th->node, &machine->dead_threads);
+       list_add_tail(&th->node, &threads->dead);
        if (lock)
-               pthread_rwlock_unlock(&machine->threads_lock);
+               pthread_rwlock_unlock(&threads->lock);
        thread__put(th);
 }
 
@@ -2140,21 +2169,26 @@ int machine__for_each_thread(struct machine *machine,
                             int (*fn)(struct thread *thread, void *p),
                             void *priv)
 {
+       struct threads *threads;
        struct rb_node *nd;
        struct thread *thread;
        int rc = 0;
+       int i;
 
-       for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
-               thread = rb_entry(nd, struct thread, rb_node);
-               rc = fn(thread, priv);
-               if (rc != 0)
-                       return rc;
-       }
+       for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+               threads = &machine->threads[i];
+               for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+                       thread = rb_entry(nd, struct thread, rb_node);
+                       rc = fn(thread, priv);
+                       if (rc != 0)
+                               return rc;
+               }
 
-       list_for_each_entry(thread, &machine->dead_threads, node) {
-               rc = fn(thread, priv);
-               if (rc != 0)
-                       return rc;
+               list_for_each_entry(thread, &threads->dead, node) {
+                       rc = fn(thread, priv);
+                       if (rc != 0)
+                               return rc;
+               }
        }
        return rc;
 }
index 3cdb134..fe2f058 100644 (file)
@@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[];
 
 struct vdso_info;
 
+#define THREADS__TABLE_BITS    8
+#define THREADS__TABLE_SIZE    (1 << THREADS__TABLE_BITS)
+
+struct threads {
+       struct rb_root    entries;
+       pthread_rwlock_t  lock;
+       unsigned int      nr;
+       struct list_head  dead;
+       struct thread     *last_match;
+};
+
 struct machine {
        struct rb_node    rb_node;
        pid_t             pid;
@@ -30,11 +41,7 @@ struct machine {
        bool              comm_exec;
        bool              kptr_restrict_warned;
        char              *root_dir;
-       struct rb_root    threads;
-       pthread_rwlock_t  threads_lock;
-       unsigned int      nr_threads;
-       struct list_head  dead_threads;
-       struct thread     *last_match;
+       struct threads    threads[THREADS__TABLE_SIZE];
        struct vdso_info  *vdso_info;
        struct perf_env   *env;
        struct dsos       dsos;
@@ -48,6 +55,12 @@ struct machine {
        };
 };
 
+static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
+{
+       /* Cast it to handle tid == -1 */
+       return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
 static inline
 struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
 {
index 808cc45..b30746f 100644 (file)
@@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new
                                  __ilist->rblist.nr_entries)
 
 /* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine)                   \
-       DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket)      \
+       DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries,     \
+                                 __machine->threads[hash_bucket].nr)
 
 #endif /* _PERF_RESORT_RB_H_ */