1 // SPDX-License-Identifier: LGPL-2.1
13 #include <sys/types.h>
18 static inline pid_t gettid(void)
20 return syscall(__NR_gettid);
24 static int loop_cnt[NR_INJECT + 1];
26 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
27 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
28 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
29 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
30 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
31 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
33 static int opt_modulo, verbose;
35 static int opt_yield, opt_signal, opt_sleep,
36 opt_disable_rseq, opt_threads = 200,
37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
39 #ifndef RSEQ_SKIP_FASTPATH
40 static long long opt_reps = 5000;
42 static long long opt_reps = 100;
45 static __thread __attribute__((tls_model("initial-exec")))
46 unsigned int signals_delivered;
50 static __thread __attribute__((tls_model("initial-exec"), unused))
51 unsigned int yield_mod_cnt, nr_abort;
53 #define printf_verbose(fmt, ...) \
56 printf(fmt, ## __VA_ARGS__); \
59 #if defined(__x86_64__) || defined(__i386__)
61 #define INJECT_ASM_REG "eax"
63 #define RSEQ_INJECT_CLOBBER \
68 #define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
77 #elif defined(__x86_64__)
79 #define RSEQ_INJECT_ASM(n) \
80 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG "\n\t" \
81 "mov (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
82 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
85 "dec %%" INJECT_ASM_REG "\n\t" \
90 #error "Unsupported architecture"
93 #elif defined(__ARMEL__)
95 #define RSEQ_INJECT_INPUT \
96 , [loop_cnt_1]"m"(loop_cnt[1]) \
97 , [loop_cnt_2]"m"(loop_cnt[2]) \
98 , [loop_cnt_3]"m"(loop_cnt[3]) \
99 , [loop_cnt_4]"m"(loop_cnt[4]) \
100 , [loop_cnt_5]"m"(loop_cnt[5]) \
101 , [loop_cnt_6]"m"(loop_cnt[6])
103 #define INJECT_ASM_REG "r4"
105 #define RSEQ_INJECT_CLOBBER \
108 #define RSEQ_INJECT_ASM(n) \
109 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
110 "cmp " INJECT_ASM_REG ", #0\n\t" \
113 "subs " INJECT_ASM_REG ", #1\n\t" \
119 #define RSEQ_INJECT_INPUT \
120 , [loop_cnt_1]"m"(loop_cnt[1]) \
121 , [loop_cnt_2]"m"(loop_cnt[2]) \
122 , [loop_cnt_3]"m"(loop_cnt[3]) \
123 , [loop_cnt_4]"m"(loop_cnt[4]) \
124 , [loop_cnt_5]"m"(loop_cnt[5]) \
125 , [loop_cnt_6]"m"(loop_cnt[6])
127 #define INJECT_ASM_REG "r18"
129 #define RSEQ_INJECT_CLOBBER \
132 #define RSEQ_INJECT_ASM(n) \
133 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
137 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
141 #elif defined(__mips__)
143 #define RSEQ_INJECT_INPUT \
144 , [loop_cnt_1]"m"(loop_cnt[1]) \
145 , [loop_cnt_2]"m"(loop_cnt[2]) \
146 , [loop_cnt_3]"m"(loop_cnt[3]) \
147 , [loop_cnt_4]"m"(loop_cnt[4]) \
148 , [loop_cnt_5]"m"(loop_cnt[5]) \
149 , [loop_cnt_6]"m"(loop_cnt[6])
151 #define INJECT_ASM_REG "$5"
153 #define RSEQ_INJECT_CLOBBER \
156 #define RSEQ_INJECT_ASM(n) \
157 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
158 "beqz " INJECT_ASM_REG ", 333f\n\t" \
160 "addiu " INJECT_ASM_REG ", -1\n\t" \
161 "bnez " INJECT_ASM_REG ", 222b\n\t" \
165 #error unsupported target
168 #define RSEQ_INJECT_FAILED \
171 #define RSEQ_INJECT_C(n) \
173 int loc_i, loc_nr_loops = loop_cnt[n]; \
175 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
178 if (loc_nr_loops == -1 && opt_modulo) { \
179 if (yield_mod_cnt == opt_modulo - 1) { \
181 poll(NULL, 0, opt_sleep); \
195 #define printf_verbose(fmt, ...)
197 #endif /* BENCHMARK */
201 struct percpu_lock_entry {
203 } __attribute__((aligned(128)));
206 struct percpu_lock_entry c[CPU_SETSIZE];
209 struct test_data_entry {
211 } __attribute__((aligned(128)));
213 struct spinlock_test_data {
214 struct percpu_lock lock;
215 struct test_data_entry c[CPU_SETSIZE];
218 struct spinlock_thread_test_data {
219 struct spinlock_test_data *data;
224 struct inc_test_data {
225 struct test_data_entry c[CPU_SETSIZE];
228 struct inc_thread_test_data {
229 struct inc_test_data *data;
234 struct percpu_list_node {
236 struct percpu_list_node *next;
239 struct percpu_list_entry {
240 struct percpu_list_node *head;
241 } __attribute__((aligned(128)));
244 struct percpu_list_entry c[CPU_SETSIZE];
247 #define BUFFER_ITEM_PER_CPU 100
249 struct percpu_buffer_node {
253 struct percpu_buffer_entry {
256 struct percpu_buffer_node **array;
257 } __attribute__((aligned(128)));
259 struct percpu_buffer {
260 struct percpu_buffer_entry c[CPU_SETSIZE];
263 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
265 struct percpu_memcpy_buffer_node {
270 struct percpu_memcpy_buffer_entry {
273 struct percpu_memcpy_buffer_node *array;
274 } __attribute__((aligned(128)));
276 struct percpu_memcpy_buffer {
277 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
280 /* A simple percpu spinlock. Grabs lock on current cpu. */
281 static int rseq_this_cpu_lock(struct percpu_lock *lock)
288 cpu = rseq_cpu_start();
289 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
291 if (rseq_likely(!ret))
293 /* Retry if comparison fails or rseq aborts. */
296 * Acquire semantic when taking lock after control dependency.
297 * Matches rseq_smp_store_release().
299 rseq_smp_acquire__after_ctrl_dep();
303 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
305 assert(lock->c[cpu].v == 1);
307 * Release lock, with release semantic. Matches
308 * rseq_smp_acquire__after_ctrl_dep().
310 rseq_smp_store_release(&lock->c[cpu].v, 0);
313 void *test_percpu_spinlock_thread(void *arg)
315 struct spinlock_thread_test_data *thread_data = arg;
316 struct spinlock_test_data *data = thread_data->data;
319 if (!opt_disable_rseq && thread_data->reg &&
320 rseq_register_current_thread())
322 reps = thread_data->reps;
323 for (i = 0; i < reps; i++) {
324 int cpu = rseq_cpu_start();
326 cpu = rseq_this_cpu_lock(&data->lock);
327 data->c[cpu].count++;
328 rseq_percpu_unlock(&data->lock, cpu);
330 if (i != 0 && !(i % (reps / 10)))
331 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
334 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
335 (int) gettid(), nr_abort, signals_delivered);
336 if (!opt_disable_rseq && thread_data->reg &&
337 rseq_unregister_current_thread())
343 * A simple test which implements a sharded counter using a per-cpu
344 * lock. Obviously real applications might prefer to simply use a
345 * per-cpu increment; however, this is reasonable for a test and the
346 * lock can be extended to synchronize more complicated operations.
348 void test_percpu_spinlock(void)
350 const int num_threads = opt_threads;
353 pthread_t test_threads[num_threads];
354 struct spinlock_test_data data;
355 struct spinlock_thread_test_data thread_data[num_threads];
357 memset(&data, 0, sizeof(data));
358 for (i = 0; i < num_threads; i++) {
359 thread_data[i].reps = opt_reps;
360 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
361 thread_data[i].reg = 1;
363 thread_data[i].reg = 0;
364 thread_data[i].data = &data;
365 ret = pthread_create(&test_threads[i], NULL,
366 test_percpu_spinlock_thread,
370 perror("pthread_create");
375 for (i = 0; i < num_threads; i++) {
376 ret = pthread_join(test_threads[i], NULL);
379 perror("pthread_join");
385 for (i = 0; i < CPU_SETSIZE; i++)
386 sum += data.c[i].count;
388 assert(sum == (uint64_t)opt_reps * num_threads);
391 void *test_percpu_inc_thread(void *arg)
393 struct inc_thread_test_data *thread_data = arg;
394 struct inc_test_data *data = thread_data->data;
397 if (!opt_disable_rseq && thread_data->reg &&
398 rseq_register_current_thread())
400 reps = thread_data->reps;
401 for (i = 0; i < reps; i++) {
407 cpu = rseq_cpu_start();
408 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
409 } while (rseq_unlikely(ret));
411 if (i != 0 && !(i % (reps / 10)))
412 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
415 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
416 (int) gettid(), nr_abort, signals_delivered);
417 if (!opt_disable_rseq && thread_data->reg &&
418 rseq_unregister_current_thread())
423 void test_percpu_inc(void)
425 const int num_threads = opt_threads;
428 pthread_t test_threads[num_threads];
429 struct inc_test_data data;
430 struct inc_thread_test_data thread_data[num_threads];
432 memset(&data, 0, sizeof(data));
433 for (i = 0; i < num_threads; i++) {
434 thread_data[i].reps = opt_reps;
435 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
436 thread_data[i].reg = 1;
438 thread_data[i].reg = 0;
439 thread_data[i].data = &data;
440 ret = pthread_create(&test_threads[i], NULL,
441 test_percpu_inc_thread,
445 perror("pthread_create");
450 for (i = 0; i < num_threads; i++) {
451 ret = pthread_join(test_threads[i], NULL);
454 perror("pthread_join");
460 for (i = 0; i < CPU_SETSIZE; i++)
461 sum += data.c[i].count;
463 assert(sum == (uint64_t)opt_reps * num_threads);
466 void this_cpu_list_push(struct percpu_list *list,
467 struct percpu_list_node *node,
473 intptr_t *targetptr, newval, expect;
476 cpu = rseq_cpu_start();
477 /* Load list->c[cpu].head with single-copy atomicity. */
478 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
479 newval = (intptr_t)node;
480 targetptr = (intptr_t *)&list->c[cpu].head;
481 node->next = (struct percpu_list_node *)expect;
482 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
483 if (rseq_likely(!ret))
485 /* Retry if comparison fails or rseq aborts. */
492 * Unlike a traditional lock-less linked list; the availability of a
493 * rseq primitive allows us to implement pop without concerns over
496 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
499 struct percpu_list_node *node = NULL;
503 struct percpu_list_node *head;
504 intptr_t *targetptr, expectnot, *load;
508 cpu = rseq_cpu_start();
509 targetptr = (intptr_t *)&list->c[cpu].head;
510 expectnot = (intptr_t)NULL;
511 offset = offsetof(struct percpu_list_node, next);
512 load = (intptr_t *)&head;
513 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
515 if (rseq_likely(!ret)) {
521 /* Retry if rseq aborts. */
529 * __percpu_list_pop is not safe against concurrent accesses. Should
530 * only be used on lists that are not concurrently modified.
532 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
534 struct percpu_list_node *node;
536 node = list->c[cpu].head;
539 list->c[cpu].head = node->next;
543 void *test_percpu_list_thread(void *arg)
546 struct percpu_list *list = (struct percpu_list *)arg;
548 if (!opt_disable_rseq && rseq_register_current_thread())
552 for (i = 0; i < reps; i++) {
553 struct percpu_list_node *node;
555 node = this_cpu_list_pop(list, NULL);
557 sched_yield(); /* encourage shuffling */
559 this_cpu_list_push(list, node, NULL);
562 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
563 (int) gettid(), nr_abort, signals_delivered);
564 if (!opt_disable_rseq && rseq_unregister_current_thread())
570 /* Simultaneous modification to a per-cpu linked list from many threads. */
571 void test_percpu_list(void)
573 const int num_threads = opt_threads;
575 uint64_t sum = 0, expected_sum = 0;
576 struct percpu_list list;
577 pthread_t test_threads[num_threads];
578 cpu_set_t allowed_cpus;
580 memset(&list, 0, sizeof(list));
582 /* Generate list entries for every usable cpu. */
583 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
584 for (i = 0; i < CPU_SETSIZE; i++) {
585 if (!CPU_ISSET(i, &allowed_cpus))
587 for (j = 1; j <= 100; j++) {
588 struct percpu_list_node *node;
592 node = malloc(sizeof(*node));
595 node->next = list.c[i].head;
596 list.c[i].head = node;
600 for (i = 0; i < num_threads; i++) {
601 ret = pthread_create(&test_threads[i], NULL,
602 test_percpu_list_thread, &list);
605 perror("pthread_create");
610 for (i = 0; i < num_threads; i++) {
611 ret = pthread_join(test_threads[i], NULL);
614 perror("pthread_join");
619 for (i = 0; i < CPU_SETSIZE; i++) {
620 struct percpu_list_node *node;
622 if (!CPU_ISSET(i, &allowed_cpus))
625 while ((node = __percpu_list_pop(&list, i))) {
632 * All entries should now be accounted for (unless some external
633 * actor is interfering with our allowed affinity while this
636 assert(sum == expected_sum);
639 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
640 struct percpu_buffer_node *node,
647 intptr_t *targetptr_spec, newval_spec;
648 intptr_t *targetptr_final, newval_final;
652 cpu = rseq_cpu_start();
653 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
654 if (offset == buffer->c[cpu].buflen)
656 newval_spec = (intptr_t)node;
657 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
658 newval_final = offset + 1;
659 targetptr_final = &buffer->c[cpu].offset;
661 ret = rseq_cmpeqv_trystorev_storev_release(
662 targetptr_final, offset, targetptr_spec,
663 newval_spec, newval_final, cpu);
665 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
666 offset, targetptr_spec, newval_spec,
668 if (rseq_likely(!ret)) {
672 /* Retry if comparison fails or rseq aborts. */
679 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
682 struct percpu_buffer_node *head;
686 intptr_t *targetptr, newval;
690 cpu = rseq_cpu_start();
691 /* Load offset with single-copy atomicity. */
692 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
697 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
699 targetptr = (intptr_t *)&buffer->c[cpu].offset;
700 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
701 (intptr_t *)&buffer->c[cpu].array[offset - 1],
702 (intptr_t)head, newval, cpu);
703 if (rseq_likely(!ret))
705 /* Retry if comparison fails or rseq aborts. */
713 * __percpu_buffer_pop is not safe against concurrent accesses. Should
714 * only be used on buffers that are not concurrently modified.
716 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
719 struct percpu_buffer_node *head;
722 offset = buffer->c[cpu].offset;
725 head = buffer->c[cpu].array[offset - 1];
726 buffer->c[cpu].offset = offset - 1;
730 void *test_percpu_buffer_thread(void *arg)
733 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
735 if (!opt_disable_rseq && rseq_register_current_thread())
739 for (i = 0; i < reps; i++) {
740 struct percpu_buffer_node *node;
742 node = this_cpu_buffer_pop(buffer, NULL);
744 sched_yield(); /* encourage shuffling */
746 if (!this_cpu_buffer_push(buffer, node, NULL)) {
747 /* Should increase buffer size. */
753 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
754 (int) gettid(), nr_abort, signals_delivered);
755 if (!opt_disable_rseq && rseq_unregister_current_thread())
761 /* Simultaneous modification to a per-cpu buffer from many threads. */
762 void test_percpu_buffer(void)
764 const int num_threads = opt_threads;
766 uint64_t sum = 0, expected_sum = 0;
767 struct percpu_buffer buffer;
768 pthread_t test_threads[num_threads];
769 cpu_set_t allowed_cpus;
771 memset(&buffer, 0, sizeof(buffer));
773 /* Generate list entries for every usable cpu. */
774 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
775 for (i = 0; i < CPU_SETSIZE; i++) {
776 if (!CPU_ISSET(i, &allowed_cpus))
778 /* Worse-case is every item in same CPU. */
780 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
781 BUFFER_ITEM_PER_CPU);
782 assert(buffer.c[i].array);
783 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
784 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
785 struct percpu_buffer_node *node;
790 * We could theoretically put the word-sized
791 * "data" directly in the buffer. However, we
792 * want to model objects that would not fit
793 * within a single word, so allocate an object
796 node = malloc(sizeof(*node));
799 buffer.c[i].array[j - 1] = node;
800 buffer.c[i].offset++;
804 for (i = 0; i < num_threads; i++) {
805 ret = pthread_create(&test_threads[i], NULL,
806 test_percpu_buffer_thread, &buffer);
809 perror("pthread_create");
814 for (i = 0; i < num_threads; i++) {
815 ret = pthread_join(test_threads[i], NULL);
818 perror("pthread_join");
823 for (i = 0; i < CPU_SETSIZE; i++) {
824 struct percpu_buffer_node *node;
826 if (!CPU_ISSET(i, &allowed_cpus))
829 while ((node = __percpu_buffer_pop(&buffer, i))) {
833 free(buffer.c[i].array);
837 * All entries should now be accounted for (unless some external
838 * actor is interfering with our allowed affinity while this
841 assert(sum == expected_sum);
844 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
845 struct percpu_memcpy_buffer_node item,
852 intptr_t *targetptr_final, newval_final, offset;
853 char *destptr, *srcptr;
857 cpu = rseq_cpu_start();
858 /* Load offset with single-copy atomicity. */
859 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
860 if (offset == buffer->c[cpu].buflen)
862 destptr = (char *)&buffer->c[cpu].array[offset];
863 srcptr = (char *)&item;
864 /* copylen must be <= 4kB. */
865 copylen = sizeof(item);
866 newval_final = offset + 1;
867 targetptr_final = &buffer->c[cpu].offset;
869 ret = rseq_cmpeqv_trymemcpy_storev_release(
870 targetptr_final, offset,
871 destptr, srcptr, copylen,
874 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
875 offset, destptr, srcptr, copylen,
877 if (rseq_likely(!ret)) {
881 /* Retry if comparison fails or rseq aborts. */
888 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
889 struct percpu_memcpy_buffer_node *item,
896 intptr_t *targetptr_final, newval_final, offset;
897 char *destptr, *srcptr;
901 cpu = rseq_cpu_start();
902 /* Load offset with single-copy atomicity. */
903 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
906 destptr = (char *)item;
907 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
908 /* copylen must be <= 4kB. */
909 copylen = sizeof(*item);
910 newval_final = offset - 1;
911 targetptr_final = &buffer->c[cpu].offset;
912 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
913 offset, destptr, srcptr, copylen,
915 if (rseq_likely(!ret)) {
919 /* Retry if comparison fails or rseq aborts. */
927 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
928 * only be used on buffers that are not concurrently modified.
930 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
931 struct percpu_memcpy_buffer_node *item,
936 offset = buffer->c[cpu].offset;
939 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
940 buffer->c[cpu].offset = offset - 1;
944 void *test_percpu_memcpy_buffer_thread(void *arg)
947 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
949 if (!opt_disable_rseq && rseq_register_current_thread())
953 for (i = 0; i < reps; i++) {
954 struct percpu_memcpy_buffer_node item;
957 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
959 sched_yield(); /* encourage shuffling */
961 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
962 /* Should increase buffer size. */
968 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
969 (int) gettid(), nr_abort, signals_delivered);
970 if (!opt_disable_rseq && rseq_unregister_current_thread())
976 /* Simultaneous modification to a per-cpu buffer from many threads. */
977 void test_percpu_memcpy_buffer(void)
979 const int num_threads = opt_threads;
981 uint64_t sum = 0, expected_sum = 0;
982 struct percpu_memcpy_buffer buffer;
983 pthread_t test_threads[num_threads];
984 cpu_set_t allowed_cpus;
986 memset(&buffer, 0, sizeof(buffer));
988 /* Generate list entries for every usable cpu. */
989 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
990 for (i = 0; i < CPU_SETSIZE; i++) {
991 if (!CPU_ISSET(i, &allowed_cpus))
993 /* Worse-case is every item in same CPU. */
995 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
996 MEMCPY_BUFFER_ITEM_PER_CPU);
997 assert(buffer.c[i].array);
998 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
999 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1000 expected_sum += 2 * j + 1;
1003 * We could theoretically put the word-sized
1004 * "data" directly in the buffer. However, we
1005 * want to model objects that would not fit
1006 * within a single word, so allocate an object
1009 buffer.c[i].array[j - 1].data1 = j;
1010 buffer.c[i].array[j - 1].data2 = j + 1;
1011 buffer.c[i].offset++;
1015 for (i = 0; i < num_threads; i++) {
1016 ret = pthread_create(&test_threads[i], NULL,
1017 test_percpu_memcpy_buffer_thread,
1021 perror("pthread_create");
1026 for (i = 0; i < num_threads; i++) {
1027 ret = pthread_join(test_threads[i], NULL);
1030 perror("pthread_join");
1035 for (i = 0; i < CPU_SETSIZE; i++) {
1036 struct percpu_memcpy_buffer_node item;
1038 if (!CPU_ISSET(i, &allowed_cpus))
1041 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1045 free(buffer.c[i].array);
1049 * All entries should now be accounted for (unless some external
1050 * actor is interfering with our allowed affinity while this
1053 assert(sum == expected_sum);
1056 static void test_signal_interrupt_handler(int signo)
1058 signals_delivered++;
1061 static int set_signal_handler(void)
1064 struct sigaction sa;
1067 ret = sigemptyset(&sigset);
1069 perror("sigemptyset");
1073 sa.sa_handler = test_signal_interrupt_handler;
1074 sa.sa_mask = sigset;
1076 ret = sigaction(SIGUSR1, &sa, NULL);
1078 perror("sigaction");
1082 printf_verbose("Signal handler set for SIGUSR1\n");
1087 static void show_usage(int argc, char **argv)
1089 printf("Usage : %s <OPTIONS>\n",
1091 printf("OPTIONS:\n");
1092 printf(" [-1 loops] Number of loops for delay injection 1\n");
1093 printf(" [-2 loops] Number of loops for delay injection 2\n");
1094 printf(" [-3 loops] Number of loops for delay injection 3\n");
1095 printf(" [-4 loops] Number of loops for delay injection 4\n");
1096 printf(" [-5 loops] Number of loops for delay injection 5\n");
1097 printf(" [-6 loops] Number of loops for delay injection 6\n");
1098 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1099 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1100 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1101 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1102 printf(" [-y] Yield\n");
1103 printf(" [-k] Kill thread with signal\n");
1104 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1105 printf(" [-t N] Number of threads (default 200)\n");
1106 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1107 printf(" [-d] Disable rseq system call (no initialization)\n");
1108 printf(" [-D M] Disable rseq for each M threads\n");
1109 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1110 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1111 printf(" [-v] Verbose output.\n");
1112 printf(" [-h] Show this help.\n");
1116 int main(int argc, char **argv)
1120 for (i = 1; i < argc; i++) {
1121 if (argv[i][0] != '-')
1123 switch (argv[i][1]) {
1134 show_usage(argc, argv);
1137 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1142 show_usage(argc, argv);
1145 opt_modulo = atol(argv[i + 1]);
1146 if (opt_modulo < 0) {
1147 show_usage(argc, argv);
1154 show_usage(argc, argv);
1157 opt_sleep = atol(argv[i + 1]);
1158 if (opt_sleep < 0) {
1159 show_usage(argc, argv);
1171 opt_disable_rseq = 1;
1175 show_usage(argc, argv);
1178 opt_disable_mod = atol(argv[i + 1]);
1179 if (opt_disable_mod < 0) {
1180 show_usage(argc, argv);
1187 show_usage(argc, argv);
1190 opt_threads = atol(argv[i + 1]);
1191 if (opt_threads < 0) {
1192 show_usage(argc, argv);
1199 show_usage(argc, argv);
1202 opt_reps = atoll(argv[i + 1]);
1204 show_usage(argc, argv);
1210 show_usage(argc, argv);
1214 show_usage(argc, argv);
1217 opt_test = *argv[i + 1];
1226 show_usage(argc, argv);
1238 show_usage(argc, argv);
1243 loop_cnt_1 = loop_cnt[1];
1244 loop_cnt_2 = loop_cnt[2];
1245 loop_cnt_3 = loop_cnt[3];
1246 loop_cnt_4 = loop_cnt[4];
1247 loop_cnt_5 = loop_cnt[5];
1248 loop_cnt_6 = loop_cnt[6];
1250 if (set_signal_handler())
1253 if (!opt_disable_rseq && rseq_register_current_thread())
1257 printf_verbose("spinlock\n");
1258 test_percpu_spinlock();
1261 printf_verbose("linked list\n");
1265 printf_verbose("buffer\n");
1266 test_percpu_buffer();
1269 printf_verbose("memcpy buffer\n");
1270 test_percpu_memcpy_buffer();
1273 printf_verbose("counter increment\n");
1277 if (!opt_disable_rseq && rseq_unregister_current_thread())