From: James Clark Date: Thu, 26 Nov 2020 14:13:25 +0000 (+0200) Subject: perf stat aggregation: Add separate socket member X-Git-Tag: microblaze-v5.12~15^2~4 X-Git-Url: http://git.monstr.eu/?p=linux-2.6-microblaze.git;a=commitdiff_plain;h=1a270cb6b3cc18663f7fd165aa691c48d68739f2 perf stat aggregation: Add separate socket member Add socket as a separate member so that it doesn't have to be packed into the int value. When the socket ID was larger than 8 bits the output appeared corrupted or incomplete. For example, here on ThunderX2 'perf stat' reports a socket of -1 and an invalid die number: ./perf stat -a --per-die The socket id number is too big. Performance counter stats for 'system wide': S-1-D255 128 687.99 msec cpu-clock # 57.240 CPUs utilized ... S36-D0 128 842.34 msec cpu-clock # 70.081 CPUs utilized ... And with --per-core there is an entry with an invalid core ID: ./perf stat record -a --per-core The socket id number is too big. Performance counter stats for 'system wide': S-1-D255-C65535 128 671.04 msec cpu-clock # 54.112 CPUs utilized ... S36-D0-C0 4 28.27 msec cpu-clock # 2.279 CPUs utilized ... This fixes the "Session topology" self test on ThunderX2. After this fix the output contains the correct socket and die IDs and no longer prints a warning about the size of the socket ID: ./perf stat --per-die -a Performance counter stats for 'system wide': S36-D0 128 169,869.39 msec cpu-clock # 127.501 CPUs utilized ... S3612-D0 128 169,733.05 msec cpu-clock # 127.398 CPUs utilized Signed-off-by: James Clark Acked-by: Namhyung Kim Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: John Garry Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Richter Link: https://lore.kernel.org/r/20201126141328.6509-10-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e51c5469d712..6248baa0f612 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1369,7 +1369,7 @@ static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (cpu != -1) - id.id = env->cpu[cpu].socket_id; + id.socket = env->cpu[cpu].socket_id; return id; } @@ -1382,18 +1382,16 @@ static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, v if (cpu != -1) { /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); + id.socket = env->cpu[cpu].socket_id; if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff); + id.id = env->cpu[cpu].die_id & 0xff; } return id; @@ -1407,23 +1405,19 @@ static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, if (cpu != -1) { /* - * Encode socket in bit range 31:24 * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine * socket + die id + core id */ - if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); - if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (env->cpu[cpu].socket_id << 24) | - (env->cpu[cpu].die_id << 16) | + id.socket = env->cpu[cpu].socket_id; + id.id = (env->cpu[cpu].die_id << 16) | (env->cpu[cpu].core_id & 0xffff); } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index ee272d45a445..777dd8291bcc 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -114,8 +114,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) session->header.env.cpu[map->map[i]].core_id == cpu_map__id_to_cpu(id.id)); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id.id)); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == cpu_map__id_to_die(id.id)); @@ -126,8 +125,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_die(map, i, NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == - cpu_map__id_to_socket(id.id << 16)); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", session->header.env.cpu[map->map[i]].die_id == @@ -140,9 +138,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) for (i = 0; i < map->nr; i++) { id = cpu_map__get_socket(map, i, NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.id); + session->header.env.cpu[map->map[i]].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Socket map - ID is set", id.id == -1); } // Test that node ID contains only node @@ -151,6 +150,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - ID is set", id.id == -1); + TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); } perf_session__delete(session); diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 5f9e98ddbe34..d2630f03f682 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -139,7 +139,7 @@ struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, cpu = map->map[idx]; - id.id = cpu_map__get_socket_id(cpu); + id.socket = cpu_map__get_socket_id(cpu); return id; } @@ -150,8 +150,10 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) if (a->id != b->id) return a->id - b->id; - else + else if (a->node != b->node) return a->node - b->node; + else + return a->socket - b->socket; } int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, @@ -196,7 +198,7 @@ int cpu_map__get_die_id(int cpu) struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) { - int cpu, s; + int cpu, die; struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); if (idx > map->nr) @@ -204,28 +206,24 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat cpu = map->map[idx]; - id.id = cpu_map__get_die_id(cpu); + die = cpu_map__get_die_id(cpu); /* There is no die_id on legacy system. */ - if (id.id == -1) - id.id = 0; - - s = cpu_map__get_socket(map, idx, data).id; - if (s == -1) - return cpu_map__empty_aggr_cpu_id(); + if (die == -1) + die = 0; /* - * Encode socket in bit range 15:8 - * die_id is relative to socket, and - * we need a global id. So we combine - * socket + die id + * die_id is relative to socket, so start + * with the socket ID and then add die to + * make a unique ID. */ - if (WARN_ONCE(id.id >> 8, "The die id number is too big.\n")) - return cpu_map__empty_aggr_cpu_id(); + id = cpu_map__get_socket(map, idx, data); + if (cpu_map__aggr_cpu_id_is_empty(id)) + return id; - if (WARN_ONCE(s >> 8, "The socket id number is too big.\n")) + if (WARN_ONCE(die >> 8, "The die id number is too big.\n")) return cpu_map__empty_aggr_cpu_id(); - id.id = (s << 8) | (id.id & 0xff); + id.id = (die & 0xff); return id; } @@ -258,7 +256,6 @@ struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *da return id; /* - * encode socket in bit range 31:24 * encode die id in bit range 23:16 * core_id is relative to socket and die, * we need a global id. So we combine @@ -624,20 +621,23 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) { return a.id == b.id && - a.node == b.node; + a.node == b.node && + a.socket == b.socket; } bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) { return a.id == -1 && - a.node == -1; + a.node == -1 && + a.socket == -1; } struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) { struct aggr_cpu_id ret = { .id = -1, - .node = -1 + .node = -1, + .socket = -1 }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index f79e92603024..0123ecc90694 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -10,6 +10,7 @@ struct aggr_cpu_id { int id; int node; + int socket; }; struct cpu_aggr_map { @@ -48,11 +49,6 @@ static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) return sock->map[s]; } -static inline int cpu_map__id_to_socket(int id) -{ - return id >> 24; -} - static inline int cpu_map__id_to_die(int id) { return (id >> 16) & 0xff; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 790392247026..5a756c88c124 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -73,7 +73,7 @@ static void aggr_printout(struct perf_stat_config *config, switch (config->aggr_mode) { case AGGR_CORE: fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id.id), + id.socket, cpu_map__id_to_die(id.id), config->csv_output ? 0 : -8, cpu_map__id_to_cpu(id.id), @@ -84,7 +84,7 @@ static void aggr_printout(struct perf_stat_config *config, break; case AGGR_DIE: fprintf(config->output, "S%d-D%*d%s%*d%s", - cpu_map__id_to_socket(id.id << 16), + id.socket, config->csv_output ? 0 : -8, cpu_map__id_to_die(id.id << 16), config->csv_sep, @@ -95,7 +95,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_SOCKET: fprintf(config->output, "S%*d%s%*d%s", config->csv_output ? 0 : -5, - id.id, + id.socket, config->csv_sep, config->csv_output ? 0 : 4, nr, @@ -113,7 +113,7 @@ static void aggr_printout(struct perf_stat_config *config, case AGGR_NONE: if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", - cpu_map__id_to_socket(id.id), + id.socket, cpu_map__id_to_die(id.id), config->csv_output ? 0 : -3, cpu_map__id_to_cpu(id.id), config->csv_sep); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1d5c9f98396b..8ce1479c98f0 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -313,7 +313,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL).id; + s = cpu_map__get_socket(cpus, cpu, NULL).socket; if (s < 0) return -1;