tools/dma: move dma_map_benchmark from selftests to tools/dma
authorQinxin Xia <xiaqinxin@huawei.com>
Tue, 28 Oct 2025 12:09:00 +0000 (20:09 +0800)
committerMarek Szyprowski <m.szyprowski@samsung.com>
Wed, 29 Oct 2025 08:41:40 +0000 (09:41 +0100)
dma_map_benchmark is a standalone developer tool rather than an
automated selftest. It has no pass/fail criteria, expects manual
invocation, and is built as a normal userspace binary. Move it to
tools/dma/ and add a minimal Makefile.

Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Suggested-by: Barry Song <baohua@kernel.org>
Signed-off-by: Qinxin Xia <xiaqinxin@huawei.com>
Acked-by: Barry Song <baohua@kernel.org>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20251028120900.2265511-3-xiaqinxin@huawei.com
include/linux/map_benchmark.h [deleted file]
include/uapi/linux/map_benchmark.h [new file with mode: 0644]
kernel/dma/map_benchmark.c
tools/Makefile
tools/dma/.gitignore [new file with mode: 0644]
tools/dma/Makefile [new file with mode: 0644]
tools/dma/config [new file with mode: 0644]
tools/dma/dma_map_benchmark.c [new file with mode: 0644]
tools/testing/selftests/dma/Makefile [deleted file]
tools/testing/selftests/dma/config [deleted file]
tools/testing/selftests/dma/dma_map_benchmark.c [deleted file]

diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h
deleted file mode 100644 (file)
index 48e2ff9..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2022 HiSilicon Limited.
- */
-
-#ifndef _KERNEL_DMA_BENCHMARK_H
-#define _KERNEL_DMA_BENCHMARK_H
-
-#define DMA_MAP_BENCHMARK       _IOWR('d', 1, struct map_benchmark)
-#define DMA_MAP_MAX_THREADS     1024
-#define DMA_MAP_MAX_SECONDS     300
-#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC)
-
-#define DMA_MAP_BIDIRECTIONAL   0
-#define DMA_MAP_TO_DEVICE       1
-#define DMA_MAP_FROM_DEVICE     2
-
-struct map_benchmark {
-       __u64 avg_map_100ns; /* average map latency in 100ns */
-       __u64 map_stddev; /* standard deviation of map latency */
-       __u64 avg_unmap_100ns; /* as above */
-       __u64 unmap_stddev;
-       __u32 threads; /* how many threads will do map/unmap in parallel */
-       __u32 seconds; /* how long the test will last */
-       __s32 node; /* which numa node this benchmark will run on */
-       __u32 dma_bits; /* DMA addressing capability */
-       __u32 dma_dir; /* DMA data direction */
-       __u32 dma_trans_ns; /* time for DMA transmission in ns */
-       __u32 granule;  /* how many PAGE_SIZE will do map/unmap once a time */
-       __u8 expansion[76]; /* For future use */
-};
-#endif /* _KERNEL_DMA_BENCHMARK_H */
diff --git a/include/uapi/linux/map_benchmark.h b/include/uapi/linux/map_benchmark.h
new file mode 100644 (file)
index 0000000..c2d9108
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2022-2025 HiSilicon Limited.
+ */
+
+#ifndef _UAPI_DMA_BENCHMARK_H
+#define _UAPI_DMA_BENCHMARK_H
+
+#include <linux/types.h>
+
+#define DMA_MAP_BENCHMARK       _IOWR('d', 1, struct map_benchmark)
+#define DMA_MAP_MAX_THREADS     1024
+#define DMA_MAP_MAX_SECONDS     300
+#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC)
+
+#define DMA_MAP_BIDIRECTIONAL   0
+#define DMA_MAP_TO_DEVICE       1
+#define DMA_MAP_FROM_DEVICE     2
+
+struct map_benchmark {
+       __u64 avg_map_100ns; /* average map latency in 100ns */
+       __u64 map_stddev; /* standard deviation of map latency */
+       __u64 avg_unmap_100ns; /* as above */
+       __u64 unmap_stddev;
+       __u32 threads; /* how many threads will do map/unmap in parallel */
+       __u32 seconds; /* how long the test will last */
+       __s32 node; /* which numa node this benchmark will run on */
+       __u32 dma_bits; /* DMA addressing capability */
+       __u32 dma_dir; /* DMA data direction */
+       __u32 dma_trans_ns; /* time for DMA transmission in ns */
+       __u32 granule;  /* how many PAGE_SIZE will do map/unmap once a time */
+       __u8 expansion[76]; /* For future use */
+};
+
+#endif /* _UAPI_DMA_BENCHMARK_H */
index cc19a3e..794041a 100644 (file)
 #include <linux/dma-mapping.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
-#include <linux/map_benchmark.h>
 #include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/timekeeping.h>
+#include <uapi/linux/map_benchmark.h>
 
 struct map_benchmark_data {
        struct map_benchmark bparam;
index c31cbbd..cb40961 100644 (file)
@@ -14,6 +14,7 @@ help:
        @echo '  counter                - counter tools'
        @echo '  cpupower               - a tool for all things x86 CPU power'
        @echo '  debugging              - tools for debugging'
+       @echo '  dma                    - tools for DMA mapping'
        @echo '  firewire               - the userspace part of nosy, an IEEE-1394 traffic sniffer'
        @echo '  firmware               - Firmware tools'
        @echo '  freefall               - laptop accelerometer program for disk protection'
@@ -69,7 +70,7 @@ acpi: FORCE
 cpupower: FORCE
        $(call descend,power/$@)
 
-counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi firmware debugging tracing: FORCE
+counter dma firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi firmware debugging tracing: FORCE
        $(call descend,$@)
 
 bpf/%: FORCE
@@ -122,7 +123,7 @@ kvm_stat: FORCE
 ynl: FORCE
        $(call descend,net/ynl)
 
-all: acpi counter cpupower gpio hv firewire \
+all: acpi counter cpupower dma gpio hv firewire \
                perf selftests bootconfig spi turbostat usb \
                virtio mm bpf x86_energy_perf_policy \
                tmon freefall iio objtool kvm_stat wmi \
@@ -134,7 +135,7 @@ acpi_install:
 cpupower_install:
        $(call descend,power/$(@:_install=),install)
 
-counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install debugging_install tracing_install:
+counter_install dma_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install debugging_install tracing_install:
        $(call descend,$(@:_install=),install)
 
 selftests_install:
@@ -164,7 +165,7 @@ kvm_stat_install:
 ynl_install:
        $(call descend,net/$(@:_install=),install)
 
-install: acpi_install counter_install cpupower_install gpio_install \
+install: acpi_install counter_install cpupower_install dma_install gpio_install \
                hv_install firewire_install iio_install \
                perf_install selftests_install turbostat_install usb_install \
                virtio_install mm_install bpf_install x86_energy_perf_policy_install \
@@ -178,7 +179,7 @@ acpi_clean:
 cpupower_clean:
        $(call descend,power/cpupower,clean)
 
-counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean firmware_clean debugging_clean tracing_clean:
+counter_clean dma_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean firmware_clean debugging_clean tracing_clean:
        $(call descend,$(@:_clean=),clean)
 
 libapi_clean:
@@ -224,7 +225,7 @@ build_clean:
 ynl_clean:
        $(call descend,net/$(@:_clean=),clean)
 
-clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
+clean: acpi_clean counter_clean cpupower_clean dma_clean hv_clean firewire_clean \
                perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
                mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
                freefall_clean build_clean libbpf_clean libsubcmd_clean \
diff --git a/tools/dma/.gitignore b/tools/dma/.gitignore
new file mode 100644 (file)
index 0000000..94b68cf
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+dma_map_benchmark
+include/linux/map_benchmark.h
diff --git a/tools/dma/Makefile b/tools/dma/Makefile
new file mode 100644 (file)
index 0000000..e4abf37
--- /dev/null
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+# This will work when dma is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := dma_map_benchmark
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/map_benchmark.h: ../../include/uapi/linux/map_benchmark.h
+       mkdir -p $(OUTPUT)include/linux 2>&1 || true
+       ln -sf $(CURDIR)/../../include/uapi/linux/map_benchmark.h $@
+
+prepare: $(OUTPUT)include/linux/map_benchmark.h
+
+FORCE:
+
+DMA_MAP_BENCHMARK = dma_map_benchmark
+$(DMA_MAP_BENCHMARK): prepare FORCE
+       $(CC) $(CFLAGS) $(DMA_MAP_BENCHMARK).c -o $(DMA_MAP_BENCHMARK)
+
+clean:
+       rm -f $(ALL_PROGRAMS)
+       rm -rf $(OUTPUT)include
+       find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
+
+install: $(ALL_PROGRAMS)
+       install -d -m 755 $(DESTDIR)$(bindir);          \
+       for program in $(ALL_PROGRAMS); do              \
+               install $$program $(DESTDIR)$(bindir);  \
+       done
+
+.PHONY: all install clean prepare FORCE
diff --git a/tools/dma/config b/tools/dma/config
new file mode 100644 (file)
index 0000000..6102ee3
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/dma/dma_map_benchmark.c b/tools/dma/dma_map_benchmark.c
new file mode 100644 (file)
index 0000000..5474a45
--- /dev/null
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 HiSilicon Limited.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/map_benchmark.h>
+
+#define NSEC_PER_MSEC  1000000L
+
+static char *directions[] = {
+       "BIDIRECTIONAL",
+       "TO_DEVICE",
+       "FROM_DEVICE",
+};
+
+int main(int argc, char **argv)
+{
+       struct map_benchmark map;
+       int fd, opt;
+       /* default single thread, run 20 seconds on NUMA_NO_NODE */
+       int threads = 1, seconds = 20, node = -1;
+       /* default dma mask 32bit, bidirectional DMA */
+       int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
+       /* default granule 1 PAGESIZE */
+       int granule = 1;
+
+       int cmd = DMA_MAP_BENCHMARK;
+
+       while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
+               switch (opt) {
+               case 't':
+                       threads = atoi(optarg);
+                       break;
+               case 's':
+                       seconds = atoi(optarg);
+                       break;
+               case 'n':
+                       node = atoi(optarg);
+                       break;
+               case 'b':
+                       bits = atoi(optarg);
+                       break;
+               case 'd':
+                       dir = atoi(optarg);
+                       break;
+               case 'x':
+                       xdelay = atoi(optarg);
+                       break;
+               case 'g':
+                       granule = atoi(optarg);
+                       break;
+               default:
+                       return -1;
+               }
+       }
+
+       if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
+               fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
+                       DMA_MAP_MAX_THREADS);
+               exit(1);
+       }
+
+       if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
+               fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
+                       DMA_MAP_MAX_SECONDS);
+               exit(1);
+       }
+
+       if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
+               fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
+                       DMA_MAP_MAX_TRANS_DELAY);
+               exit(1);
+       }
+
+       /* suppose the mininum DMA zone is 1MB in the world */
+       if (bits < 20 || bits > 64) {
+               fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
+               exit(1);
+       }
+
+       if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
+                       dir != DMA_MAP_FROM_DEVICE) {
+               fprintf(stderr, "invalid dma direction\n");
+               exit(1);
+       }
+
+       if (granule < 1 || granule > 1024) {
+               fprintf(stderr, "invalid granule size\n");
+               exit(1);
+       }
+
+       fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
+       if (fd == -1) {
+               perror("open");
+               exit(1);
+       }
+
+       memset(&map, 0, sizeof(map));
+       map.seconds = seconds;
+       map.threads = threads;
+       map.node = node;
+       map.dma_bits = bits;
+       map.dma_dir = dir;
+       map.dma_trans_ns = xdelay;
+       map.granule = granule;
+
+       if (ioctl(fd, cmd, &map)) {
+               perror("ioctl");
+               exit(1);
+       }
+
+       printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
+                       threads, seconds, node, dir[directions], granule);
+       printf("average map latency(us):%.1f standard deviation:%.1f\n",
+                       map.avg_map_100ns/10.0, map.map_stddev/10.0);
+       printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
+                       map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
deleted file mode 100644 (file)
index cd8c5ec..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../usr/include/
-CFLAGS += -I../../../../include/
-
-TEST_GEN_PROGS := dma_map_benchmark
-
-include ../lib.mk
diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config
deleted file mode 100644 (file)
index 6102ee3..0000000
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
deleted file mode 100644 (file)
index b12f1f9..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2020 HiSilicon Limited.
- */
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <linux/types.h>
-#include <linux/map_benchmark.h>
-
-#define NSEC_PER_MSEC  1000000L
-
-static char *directions[] = {
-       "BIDIRECTIONAL",
-       "TO_DEVICE",
-       "FROM_DEVICE",
-};
-
-int main(int argc, char **argv)
-{
-       struct map_benchmark map;
-       int fd, opt;
-       /* default single thread, run 20 seconds on NUMA_NO_NODE */
-       int threads = 1, seconds = 20, node = -1;
-       /* default dma mask 32bit, bidirectional DMA */
-       int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
-       /* default granule 1 PAGESIZE */
-       int granule = 1;
-
-       int cmd = DMA_MAP_BENCHMARK;
-
-       while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
-               switch (opt) {
-               case 't':
-                       threads = atoi(optarg);
-                       break;
-               case 's':
-                       seconds = atoi(optarg);
-                       break;
-               case 'n':
-                       node = atoi(optarg);
-                       break;
-               case 'b':
-                       bits = atoi(optarg);
-                       break;
-               case 'd':
-                       dir = atoi(optarg);
-                       break;
-               case 'x':
-                       xdelay = atoi(optarg);
-                       break;
-               case 'g':
-                       granule = atoi(optarg);
-                       break;
-               default:
-                       return -1;
-               }
-       }
-
-       if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
-               fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
-                       DMA_MAP_MAX_THREADS);
-               exit(1);
-       }
-
-       if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
-               fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
-                       DMA_MAP_MAX_SECONDS);
-               exit(1);
-       }
-
-       if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
-               fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
-                       DMA_MAP_MAX_TRANS_DELAY);
-               exit(1);
-       }
-
-       /* suppose the mininum DMA zone is 1MB in the world */
-       if (bits < 20 || bits > 64) {
-               fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
-               exit(1);
-       }
-
-       if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
-                       dir != DMA_MAP_FROM_DEVICE) {
-               fprintf(stderr, "invalid dma direction\n");
-               exit(1);
-       }
-
-       if (granule < 1 || granule > 1024) {
-               fprintf(stderr, "invalid granule size\n");
-               exit(1);
-       }
-
-       fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
-       if (fd == -1) {
-               perror("open");
-               exit(1);
-       }
-
-       memset(&map, 0, sizeof(map));
-       map.seconds = seconds;
-       map.threads = threads;
-       map.node = node;
-       map.dma_bits = bits;
-       map.dma_dir = dir;
-       map.dma_trans_ns = xdelay;
-       map.granule = granule;
-
-       if (ioctl(fd, cmd, &map)) {
-               perror("ioctl");
-               exit(1);
-       }
-
-       printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
-                       threads, seconds, node, dir[directions], granule);
-       printf("average map latency(us):%.1f standard deviation:%.1f\n",
-                       map.avg_map_100ns/10.0, map.map_stddev/10.0);
-       printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
-                       map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
-
-       return 0;
-}