tools/testing/selftests/kvm/memslot_perf_test.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * A memslot-related performance benchmark.
   4  *
   5  * Copyright (C) 2021 Oracle and/or its affiliates.
   6  *
   7  * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
   8  */
   9 #include <pthread.h>
  10 #include <sched.h>
  11 #include <semaphore.h>
  12 #include <stdatomic.h>
  13 #include <stdbool.h>
  14 #include <stdint.h>
  15 #include <stdio.h>
  16 #include <stdlib.h>
  17 #include <string.h>
  18 #include <sys/mman.h>
  19 #include <time.h>
  20 #include <unistd.h>
  21
  22 #include <linux/compiler.h>
  23
  24 #include <test_util.h>
  25 #include <kvm_util.h>
  26 #include <processor.h>
  27
  28 #define VCPU_ID 0
  29
  30 #define MEM_SIZE                ((512U << 20) + 4096)
  31 #define MEM_SIZE_PAGES          (MEM_SIZE / 4096)
  32 #define MEM_GPA         0x10000000UL
  33 #define MEM_AUX_GPA             MEM_GPA
  34 #define MEM_SYNC_GPA            MEM_AUX_GPA
  35 #define MEM_TEST_GPA            (MEM_AUX_GPA + 4096)
  36 #define MEM_TEST_SIZE           (MEM_SIZE - 4096)
  37 static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
  38 static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
  39
  40 /*
  41  * 32 MiB is max size that gets well over 100 iterations on 509 slots.
  42  * Considering that each slot needs to have at least one page up to
  43  * 8194 slots in use can then be tested (although with slightly
  44  * limited resolution).
  45  */
  46 #define MEM_SIZE_MAP            ((32U << 20) + 4096)
  47 #define MEM_SIZE_MAP_PAGES      (MEM_SIZE_MAP / 4096)
  48 #define MEM_TEST_MAP_SIZE       (MEM_SIZE_MAP - 4096)
  49 #define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
  50 static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
  51 static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
  52 static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
  53 static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
  54
  55 /*
  56  * 128 MiB is min size that fills 32k slots with at least one page in each
  57  * while at the same time gets 100+ iterations in such test
  58  */
  59 #define MEM_TEST_UNMAP_SIZE             (128U << 20)
  60 #define MEM_TEST_UNMAP_SIZE_PAGES       (MEM_TEST_UNMAP_SIZE / 4096)
  61 /* 2 MiB chunk size like a typical huge page */
  62 #define MEM_TEST_UNMAP_CHUNK_PAGES      (2U << (20 - 12))
  63 static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
  64               "invalid unmap test region size");
  65 static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
  66               "invalid unmap test region size");
  67 static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
  68               (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
  69               "invalid unmap test region size");
  70
  71 /*
  72  * For the move active test the middle of the test area is placed on
  73  * a memslot boundary: half lies in the memslot being moved, half in
  74  * other memslot(s).
  75  *
  76  * When running this test with 32k memslots (32764, really) each memslot
  77  * contains 4 pages.
  78  * The last one additionally contains the remaining 21 pages of memory,
  79  * for the total size of 25 pages.
  80  * Hence, the maximum size here is 50 pages.
  81  */
  82 #define MEM_TEST_MOVE_SIZE_PAGES        (50)
  83 #define MEM_TEST_MOVE_SIZE              (MEM_TEST_MOVE_SIZE_PAGES * 4096)
  84 #define MEM_TEST_MOVE_GPA_DEST          (MEM_GPA + MEM_SIZE)
  85 static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
  86               "invalid move test region size");
  87
  88 #define MEM_TEST_VAL_1 0x1122334455667788
  89 #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
  90
  91 struct vm_data {
  92         struct kvm_vm *vm;
  93         pthread_t vcpu_thread;
  94         uint32_t nslots;
  95         uint64_t npages;
  96         uint64_t pages_per_slot;
  97         void **hva_slots;
  98         bool mmio_ok;
  99         uint64_t mmio_gpa_min;
 100         uint64_t mmio_gpa_max;
 101 };
 102
 103 struct sync_area {
 104         atomic_bool start_flag;
 105         atomic_bool exit_flag;
 106         atomic_bool sync_flag;
 107         void *move_area_ptr;
 108 };
 109
 110 /*
 111  * Technically, we need also for the atomic bool to be address-free, which
 112  * is recommended, but not strictly required, by C11 for lockless
 113  * implementations.
 114  * However, in practice both GCC and Clang fulfill this requirement on
 115  * all KVM-supported platforms.
 116  */
 117 static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
 118
 119 static sem_t vcpu_ready;
 120
 121 static bool map_unmap_verify;
 122
 123 static bool verbose;
 124 #define pr_info_v(...)                          \
 125         do {                                    \
 126                 if (verbose)                    \
 127                         pr_info(__VA_ARGS__);   \
 128         } while (0)
 129
 130 static void *vcpu_worker(void *data)
 131 {
 132         struct vm_data *vm = data;
 133         struct kvm_run *run;
 134         struct ucall uc;
 135         uint64_t cmd;
 136
 137         run = vcpu_state(vm->vm, VCPU_ID);
 138         while (1) {
 139                 vcpu_run(vm->vm, VCPU_ID);
 140
 141                 if (run->exit_reason == KVM_EXIT_IO) {
 142                         cmd = get_ucall(vm->vm, VCPU_ID, &uc);
 143                         if (cmd != UCALL_SYNC)
 144                                 break;
 145
 146                         sem_post(&vcpu_ready);
 147                         continue;
 148                 }
 149
 150                 if (run->exit_reason != KVM_EXIT_MMIO)
 151                         break;
 152
 153                 TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
 154                 TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
 155                 TEST_ASSERT(run->mmio.len == 8,
 156                             "Unexpected exit mmio size = %u", run->mmio.len);
 157                 TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
 158                             run->mmio.phys_addr <= vm->mmio_gpa_max,
 159                             "Unexpected exit mmio address = 0x%llx",
 160                             run->mmio.phys_addr);
 161         }
 162
 163         if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
 164                 TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
 165                           __FILE__, uc.args[1], uc.args[2]);
 166
 167         return NULL;
 168 }
 169
 170 static void wait_for_vcpu(void)
 171 {
 172         struct timespec ts;
 173
 174         TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
 175                     "clock_gettime() failed: %d\n", errno);
 176
 177         ts.tv_sec += 2;
 178         TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
 179                     "sem_timedwait() failed: %d\n", errno);
 180 }
 181
 182 static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
 183 {
 184         uint64_t gpage, pgoffs;
 185         uint32_t slot, slotoffs;
 186         void *base;
 187
 188         TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
 189         TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
 190                     "Too high gpa to translate");
 191         gpa -= MEM_GPA;
 192
 193         gpage = gpa / 4096;
 194         pgoffs = gpa % 4096;
 195         slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
 196         slotoffs = gpage - (slot * data->pages_per_slot);
 197
 198         if (rempages) {
 199                 uint64_t slotpages;
 200
 201                 if (slot == data->nslots - 1)
 202                         slotpages = data->npages - slot * data->pages_per_slot;
 203                 else
 204                         slotpages = data->pages_per_slot;
 205
 206                 TEST_ASSERT(!pgoffs,
 207                             "Asking for remaining pages in slot but gpa not page aligned");
 208                 *rempages = slotpages - slotoffs;
 209         }
 210
 211         base = data->hva_slots[slot];
 212         return (uint8_t *)base + slotoffs * 4096 + pgoffs;
 213 }
 214
 215 static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
 216 {
 217         TEST_ASSERT(slot < data->nslots, "Too high slot number");
 218
 219         return MEM_GPA + slot * data->pages_per_slot * 4096;
 220 }
 221
 222 static struct vm_data *alloc_vm(void)
 223 {
 224         struct vm_data *data;
 225
 226         data = malloc(sizeof(*data));
 227         TEST_ASSERT(data, "malloc(vmdata) failed");
 228
 229         data->vm = NULL;
 230         data->hva_slots = NULL;
 231
 232         return data;
 233 }
 234
 235 static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 236                        void *guest_code, uint64_t mempages,
 237                        struct timespec *slot_runtime)
 238 {
 239         uint32_t max_mem_slots;
 240         uint64_t rempages;
 241         uint64_t guest_addr;
 242         uint32_t slot;
 243         struct timespec tstart;
 244         struct sync_area *sync;
 245
 246         max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
 247         TEST_ASSERT(max_mem_slots > 1,
 248                     "KVM_CAP_NR_MEMSLOTS should be greater than 1");
 249         TEST_ASSERT(nslots > 1 || nslots == -1,
 250                     "Slot count cap should be greater than 1");
 251         if (nslots != -1)
 252                 max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
 253         pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
 254
 255         TEST_ASSERT(mempages > 1,
 256                     "Can't test without any memory");
 257
 258         data->npages = mempages;
 259         data->nslots = max_mem_slots - 1;
 260         data->pages_per_slot = mempages / data->nslots;
 261         if (!data->pages_per_slot) {
 262                 *maxslots = mempages + 1;
 263                 return false;
 264         }
 265
 266         rempages = mempages % data->nslots;
 267         data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
 268         TEST_ASSERT(data->hva_slots, "malloc() fail");
 269
 270         data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
 271
 272         pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 273                 max_mem_slots - 1, data->pages_per_slot, rempages);
 274
 275         clock_gettime(CLOCK_MONOTONIC, &tstart);
 276         for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
 277                 uint64_t npages;
 278
 279                 npages = data->pages_per_slot;
 280                 if (slot == max_mem_slots - 1)
 281                         npages += rempages;
 282
 283                 vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
 284                                             guest_addr, slot, npages,
 285                                             0);
 286                 guest_addr += npages * 4096;
 287         }
 288         *slot_runtime = timespec_elapsed(tstart);
 289
 290         for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
 291                 uint64_t npages;
 292                 uint64_t gpa;
 293
 294                 npages = data->pages_per_slot;
 295                 if (slot == max_mem_slots - 2)
 296                         npages += rempages;
 297
 298                 gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
 299                                          slot + 1);
 300                 TEST_ASSERT(gpa == guest_addr,
 301                             "vm_phy_pages_alloc() failed\n");
 302
 303                 data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
 304                 memset(data->hva_slots[slot], 0, npages * 4096);
 305
 306                 guest_addr += npages * 4096;
 307         }
 308
 309         virt_map(data->vm, MEM_GPA, MEM_GPA, mempages, 0);
 310
 311         sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 312         atomic_init(&sync->start_flag, false);
 313         atomic_init(&sync->exit_flag, false);
 314         atomic_init(&sync->sync_flag, false);
 315
 316         data->mmio_ok = false;
 317
 318         return true;
 319 }
 320
 321 static void launch_vm(struct vm_data *data)
 322 {
 323         pr_info_v("Launching the test VM\n");
 324
 325         pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
 326
 327         /* Ensure the guest thread is spun up. */
 328         wait_for_vcpu();
 329 }
 330
 331 static void free_vm(struct vm_data *data)
 332 {
 333         kvm_vm_free(data->vm);
 334         free(data->hva_slots);
 335         free(data);
 336 }
 337
 338 static void wait_guest_exit(struct vm_data *data)
 339 {
 340         pthread_join(data->vcpu_thread, NULL);
 341 }
 342
 343 static void let_guest_run(struct sync_area *sync)
 344 {
 345         atomic_store_explicit(&sync->start_flag, true, memory_order_release);
 346 }
 347
 348 static void guest_spin_until_start(void)
 349 {
 350         struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 351
 352         while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
 353                 ;
 354 }
 355
 356 static void make_guest_exit(struct sync_area *sync)
 357 {
 358         atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
 359 }
 360
 361 static bool _guest_should_exit(void)
 362 {
 363         struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 364
 365         return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
 366 }
 367
 368 #define guest_should_exit() unlikely(_guest_should_exit())
 369
 370 /*
 371  * noinline so we can easily see how much time the host spends waiting
 372  * for the guest.
 373  * For the same reason use alarm() instead of polling clock_gettime()
 374  * to implement a wait timeout.
 375  */
 376 static noinline void host_perform_sync(struct sync_area *sync)
 377 {
 378         alarm(2);
 379
 380         atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
 381         while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
 382                 ;
 383
 384         alarm(0);
 385 }
 386
 387 static bool guest_perform_sync(void)
 388 {
 389         struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 390         bool expected;
 391
 392         do {
 393                 if (guest_should_exit())
 394                         return false;
 395
 396                 expected = true;
 397         } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
 398                                                         &expected, false,
 399                                                         memory_order_acq_rel,
 400                                                         memory_order_relaxed));
 401
 402         return true;
 403 }
 404
 405 static void guest_code_test_memslot_move(void)
 406 {
 407         struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 408         uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
 409
 410         GUEST_SYNC(0);
 411
 412         guest_spin_until_start();
 413
 414         while (!guest_should_exit()) {
 415                 uintptr_t ptr;
 416
 417                 for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
 418                      ptr += 4096)
 419                         *(uint64_t *)ptr = MEM_TEST_VAL_1;
 420
 421                 /*
 422                  * No host sync here since the MMIO exits are so expensive
 423                  * that the host would spend most of its time waiting for
 424                  * the guest and so instead of measuring memslot move
 425                  * performance we would measure the performance and
 426                  * likelihood of MMIO exits
 427                  */
 428         }
 429
 430         GUEST_DONE();
 431 }
 432
 433 static void guest_code_test_memslot_map(void)
 434 {
 435         struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 436
 437         GUEST_SYNC(0);
 438
 439         guest_spin_until_start();
 440
 441         while (1) {
 442                 uintptr_t ptr;
 443
 444                 for (ptr = MEM_TEST_GPA;
 445                      ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
 446                         *(uint64_t *)ptr = MEM_TEST_VAL_1;
 447
 448                 if (!guest_perform_sync())
 449                         break;
 450
 451                 for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
 452                      ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
 453                         *(uint64_t *)ptr = MEM_TEST_VAL_2;
 454
 455                 if (!guest_perform_sync())
 456                         break;
 457         }
 458
 459         GUEST_DONE();
 460 }
 461
 462 static void guest_code_test_memslot_unmap(void)
 463 {
 464         struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
 465
 466         GUEST_SYNC(0);
 467
 468         guest_spin_until_start();
 469
 470         while (1) {
 471                 uintptr_t ptr = MEM_TEST_GPA;
 472
 473                 /*
 474                  * We can afford to access (map) just a small number of pages
 475                  * per host sync as otherwise the host will spend
 476                  * a significant amount of its time waiting for the guest
 477                  * (instead of doing unmap operations), so this will
 478                  * effectively turn this test into a map performance test.
 479                  *
 480                  * Just access a single page to be on the safe side.
 481                  */
 482                 *(uint64_t *)ptr = MEM_TEST_VAL_1;
 483
 484                 if (!guest_perform_sync())
 485                         break;
 486
 487                 ptr += MEM_TEST_UNMAP_SIZE / 2;
 488                 *(uint64_t *)ptr = MEM_TEST_VAL_2;
 489
 490                 if (!guest_perform_sync())
 491                         break;
 492         }
 493
 494         GUEST_DONE();
 495 }
 496
 497 static void guest_code_test_memslot_rw(void)
 498 {
 499         GUEST_SYNC(0);
 500
 501         guest_spin_until_start();
 502
 503         while (1) {
 504                 uintptr_t ptr;
 505
 506                 for (ptr = MEM_TEST_GPA;
 507                      ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
 508                         *(uint64_t *)ptr = MEM_TEST_VAL_1;
 509
 510                 if (!guest_perform_sync())
 511                         break;
 512
 513                 for (ptr = MEM_TEST_GPA + 4096 / 2;
 514                      ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
 515                         uint64_t val = *(uint64_t *)ptr;
 516
 517                         GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
 518                         *(uint64_t *)ptr = 0;
 519                 }
 520
 521                 if (!guest_perform_sync())
 522                         break;
 523         }
 524
 525         GUEST_DONE();
 526 }
 527
 528 static bool test_memslot_move_prepare(struct vm_data *data,
 529                                       struct sync_area *sync,
 530                                       uint64_t *maxslots, bool isactive)
 531 {
 532         uint64_t movesrcgpa, movetestgpa;
 533
 534         movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
 535
 536         if (isactive) {
 537                 uint64_t lastpages;
 538
 539                 vm_gpa2hva(data, movesrcgpa, &lastpages);
 540                 if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
 541                         *maxslots = 0;
 542                         return false;
 543                 }
 544         }
 545
 546         movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
 547         sync->move_area_ptr = (void *)movetestgpa;
 548
 549         if (isactive) {
 550                 data->mmio_ok = true;
 551                 data->mmio_gpa_min = movesrcgpa;
 552                 data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
 553         }
 554
 555         return true;
 556 }
 557
 558 static bool test_memslot_move_prepare_active(struct vm_data *data,
 559                                              struct sync_area *sync,
 560                                              uint64_t *maxslots)
 561 {
 562         return test_memslot_move_prepare(data, sync, maxslots, true);
 563 }
 564
 565 static bool test_memslot_move_prepare_inactive(struct vm_data *data,
 566                                                struct sync_area *sync,
 567                                                uint64_t *maxslots)
 568 {
 569         return test_memslot_move_prepare(data, sync, maxslots, false);
 570 }
 571
 572 static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
 573 {
 574         uint64_t movesrcgpa;
 575
 576         movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
 577         vm_mem_region_move(data->vm, data->nslots - 1 + 1,
 578                            MEM_TEST_MOVE_GPA_DEST);
 579         vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
 580 }
 581
 582 static void test_memslot_do_unmap(struct vm_data *data,
 583                                   uint64_t offsp, uint64_t count)
 584 {
 585         uint64_t gpa, ctr;
 586
 587         for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
 588                 uint64_t npages;
 589                 void *hva;
 590                 int ret;
 591
 592                 hva = vm_gpa2hva(data, gpa, &npages);
 593                 TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
 594                 npages = min(npages, count - ctr);
 595                 ret = madvise(hva, npages * 4096, MADV_DONTNEED);
 596                 TEST_ASSERT(!ret,
 597                             "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
 598                             hva, gpa);
 599                 ctr += npages;
 600                 gpa += npages * 4096;
 601         }
 602         TEST_ASSERT(ctr == count,
 603                     "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
 604 }
 605
 606 static void test_memslot_map_unmap_check(struct vm_data *data,
 607                                          uint64_t offsp, uint64_t valexp)
 608 {
 609         uint64_t gpa;
 610         uint64_t *val;
 611
 612         if (!map_unmap_verify)
 613                 return;
 614
 615         gpa = MEM_TEST_GPA + offsp * 4096;
 616         val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
 617         TEST_ASSERT(*val == valexp,
 618                     "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
 619                     *val, valexp, gpa);
 620         *val = 0;
 621 }
 622
 623 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
 624 {
 625         /*
 626          * Unmap the second half of the test area while guest writes to (maps)
 627          * the first half.
 628          */
 629         test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
 630                               MEM_TEST_MAP_SIZE_PAGES / 2);
 631
 632         /*
 633          * Wait for the guest to finish writing the first half of the test
 634          * area, verify the written value on the first and the last page of
 635          * this area and then unmap it.
 636          * Meanwhile, the guest is writing to (mapping) the second half of
 637          * the test area.
 638          */
 639         host_perform_sync(sync);
 640         test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
 641         test_memslot_map_unmap_check(data,
 642                                      MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
 643                                      MEM_TEST_VAL_1);
 644         test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
 645
 646
 647         /*
 648          * Wait for the guest to finish writing the second half of the test
 649          * area and verify the written value on the first and the last page
 650          * of this area.
 651          * The area will be unmapped at the beginning of the next loop
 652          * iteration.
 653          * Meanwhile, the guest is writing to (mapping) the first half of
 654          * the test area.
 655          */
 656         host_perform_sync(sync);
 657         test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
 658                                      MEM_TEST_VAL_2);
 659         test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
 660                                      MEM_TEST_VAL_2);
 661 }
 662
 663 static void test_memslot_unmap_loop_common(struct vm_data *data,
 664                                            struct sync_area *sync,
 665                                            uint64_t chunk)
 666 {
 667         uint64_t ctr;
 668
 669         /*
 670          * Wait for the guest to finish mapping page(s) in the first half
 671          * of the test area, verify the written value and then perform unmap
 672          * of this area.
 673          * Meanwhile, the guest is writing to (mapping) page(s) in the second
 674          * half of the test area.
 675          */
 676         host_perform_sync(sync);
 677         test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
 678         for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
 679                 test_memslot_do_unmap(data, ctr, chunk);
 680
 681         /* Likewise, but for the opposite host / guest areas */
 682         host_perform_sync(sync);
 683         test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
 684                                      MEM_TEST_VAL_2);
 685         for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
 686              ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
 687                 test_memslot_do_unmap(data, ctr, chunk);
 688 }
 689
 690 static void test_memslot_unmap_loop(struct vm_data *data,
 691                                     struct sync_area *sync)
 692 {
 693         test_memslot_unmap_loop_common(data, sync, 1);
 694 }
 695
 696 static void test_memslot_unmap_loop_chunked(struct vm_data *data,
 697                                             struct sync_area *sync)
 698 {
 699         test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
 700 }
 701
 702 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
 703 {
 704         uint64_t gptr;
 705
 706         for (gptr = MEM_TEST_GPA + 4096 / 2;
 707              gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
 708                 *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
 709
 710         host_perform_sync(sync);
 711
 712         for (gptr = MEM_TEST_GPA;
 713              gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
 714                 uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
 715                 uint64_t val = *vptr;
 716
 717                 TEST_ASSERT(val == MEM_TEST_VAL_1,
 718                             "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
 719                             val, gptr);
 720                 *vptr = 0;
 721         }
 722
 723         host_perform_sync(sync);
 724 }
 725
 726 struct test_data {
 727         const char *name;
 728         uint64_t mem_size;
 729         void (*guest_code)(void);
 730         bool (*prepare)(struct vm_data *data, struct sync_area *sync,
 731                         uint64_t *maxslots);
 732         void (*loop)(struct vm_data *data, struct sync_area *sync);
 733 };
 734
 735 static bool test_execute(int nslots, uint64_t *maxslots,
 736                          unsigned int maxtime,
 737                          const struct test_data *tdata,
 738                          uint64_t *nloops,
 739                          struct timespec *slot_runtime,
 740                          struct timespec *guest_runtime)
 741 {
 742         uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
 743         struct vm_data *data;
 744         struct sync_area *sync;
 745         struct timespec tstart;
 746         bool ret = true;
 747
 748         data = alloc_vm();
 749         if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
 750                         mem_size, slot_runtime)) {
 751                 ret = false;
 752                 goto exit_free;
 753         }
 754
 755         sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 756
 757         if (tdata->prepare &&
 758             !tdata->prepare(data, sync, maxslots)) {
 759                 ret = false;
 760                 goto exit_free;
 761         }
 762
 763         launch_vm(data);
 764
 765         clock_gettime(CLOCK_MONOTONIC, &tstart);
 766         let_guest_run(sync);
 767
 768         while (1) {
 769                 *guest_runtime = timespec_elapsed(tstart);
 770                 if (guest_runtime->tv_sec >= maxtime)
 771                         break;
 772
 773                 tdata->loop(data, sync);
 774
 775                 (*nloops)++;
 776         }
 777
 778         make_guest_exit(sync);
 779         wait_guest_exit(data);
 780
 781 exit_free:
 782         free_vm(data);
 783
 784         return ret;
 785 }
 786
 787 static const struct test_data tests[] = {
 788         {
 789                 .name = "map",
 790                 .mem_size = MEM_SIZE_MAP_PAGES,
 791                 .guest_code = guest_code_test_memslot_map,
 792                 .loop = test_memslot_map_loop,
 793         },
 794         {
 795                 .name = "unmap",
 796                 .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
 797                 .guest_code = guest_code_test_memslot_unmap,
 798                 .loop = test_memslot_unmap_loop,
 799         },
 800         {
 801                 .name = "unmap chunked",
 802                 .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
 803                 .guest_code = guest_code_test_memslot_unmap,
 804                 .loop = test_memslot_unmap_loop_chunked,
 805         },
 806         {
 807                 .name = "move active area",
 808                 .guest_code = guest_code_test_memslot_move,
 809                 .prepare = test_memslot_move_prepare_active,
 810                 .loop = test_memslot_move_loop,
 811         },
 812         {
 813                 .name = "move inactive area",
 814                 .guest_code = guest_code_test_memslot_move,
 815                 .prepare = test_memslot_move_prepare_inactive,
 816                 .loop = test_memslot_move_loop,
 817         },
 818         {
 819                 .name = "RW",
 820                 .guest_code = guest_code_test_memslot_rw,
 821                 .loop = test_memslot_rw_loop
 822         },
 823 };
 824
 825 #define NTESTS ARRAY_SIZE(tests)
 826
 827 struct test_args {
 828         int tfirst;
 829         int tlast;
 830         int nslots;
 831         int seconds;
 832         int runs;
 833 };
 834
 835 static void help(char *name, struct test_args *targs)
 836 {
 837         int ctr;
 838
 839         pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
 840                 name);
 841         pr_info(" -h: print this help screen.\n");
 842         pr_info(" -v: enable verbose mode (not for benchmarking).\n");
 843         pr_info(" -d: enable extra debug checks.\n");
 844         pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
 845                 targs->nslots);
 846         pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
 847                 targs->tfirst, NTESTS - 1);
 848         pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
 849                 targs->tlast, NTESTS - 1);
 850         pr_info(" -l: specify the test length in seconds (currently: %i)\n",
 851                 targs->seconds);
 852         pr_info(" -r: specify the number of runs per test (currently: %i)\n",
 853                 targs->runs);
 854
 855         pr_info("\nAvailable tests:\n");
 856         for (ctr = 0; ctr < NTESTS; ctr++)
 857                 pr_info("%d: %s\n", ctr, tests[ctr].name);
 858 }
 859
 860 static bool parse_args(int argc, char *argv[],
 861                        struct test_args *targs)
 862 {
 863         int opt;
 864
 865         while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
 866                 switch (opt) {
 867                 case 'h':
 868                 default:
 869                         help(argv[0], targs);
 870                         return false;
 871                 case 'v':
 872                         verbose = true;
 873                         break;
 874                 case 'd':
 875                         map_unmap_verify = true;
 876                         break;
 877                 case 's':
 878                         targs->nslots = atoi(optarg);
 879                         if (targs->nslots <= 0 && targs->nslots != -1) {
 880                                 pr_info("Slot count cap has to be positive or -1 for no cap\n");
 881                                 return false;
 882                         }
 883                         break;
 884                 case 'f':
 885                         targs->tfirst = atoi(optarg);
 886                         if (targs->tfirst < 0) {
 887                                 pr_info("First test to run has to be non-negative\n");
 888                                 return false;
 889                         }
 890                         break;
 891                 case 'e':
 892                         targs->tlast = atoi(optarg);
 893                         if (targs->tlast < 0 || targs->tlast >= NTESTS) {
 894                                 pr_info("Last test to run has to be non-negative and less than %zu\n",
 895                                         NTESTS);
 896                                 return false;
 897                         }
 898                         break;
 899                 case 'l':
 900                         targs->seconds = atoi(optarg);
 901                         if (targs->seconds < 0) {
 902                                 pr_info("Test length in seconds has to be non-negative\n");
 903                                 return false;
 904                         }
 905                         break;
 906                 case 'r':
 907                         targs->runs = atoi(optarg);
 908                         if (targs->runs <= 0) {
 909                                 pr_info("Runs per test has to be positive\n");
 910                                 return false;
 911                         }
 912                         break;
 913                 }
 914         }
 915
 916         if (optind < argc) {
 917                 help(argv[0], targs);
 918                 return false;
 919         }
 920
 921         if (targs->tfirst > targs->tlast) {
 922                 pr_info("First test to run cannot be greater than the last test to run\n");
 923                 return false;
 924         }
 925
 926         return true;
 927 }
 928
 929 struct test_result {
 930         struct timespec slot_runtime, guest_runtime, iter_runtime;
 931         int64_t slottimens, runtimens;
 932         uint64_t nloops;
 933 };
 934
 935 static bool test_loop(const struct test_data *data,
 936                       const struct test_args *targs,
 937                       struct test_result *rbestslottime,
 938                       struct test_result *rbestruntime)
 939 {
 940         uint64_t maxslots;
 941         struct test_result result;
 942
 943         result.nloops = 0;
 944         if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
 945                           &result.nloops,
 946                           &result.slot_runtime, &result.guest_runtime)) {
 947                 if (maxslots)
 948                         pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
 949                                 maxslots);
 950                 else
 951                         pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
 952
 953                 return false;
 954         }
 955
 956         pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
 957                 result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
 958                 result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
 959         if (!result.nloops) {
 960                 pr_info("No full loops done - too short test time or system too loaded?\n");
 961                 return true;
 962         }
 963
 964         result.iter_runtime = timespec_div(result.guest_runtime,
 965                                            result.nloops);
 966         pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
 967                 result.nloops,
 968                 result.iter_runtime.tv_sec,
 969                 result.iter_runtime.tv_nsec);
 970         result.slottimens = timespec_to_ns(result.slot_runtime);
 971         result.runtimens = timespec_to_ns(result.iter_runtime);
 972
 973         /*
 974          * Only rank the slot setup time for tests using the whole test memory
 975          * area so they are comparable
 976          */
 977         if (!data->mem_size &&
 978             (!rbestslottime->slottimens ||
 979              result.slottimens < rbestslottime->slottimens))
 980                 *rbestslottime = result;
 981         if (!rbestruntime->runtimens ||
 982             result.runtimens < rbestruntime->runtimens)
 983                 *rbestruntime = result;
 984
 985         return true;
 986 }
 987
 988 int main(int argc, char *argv[])
 989 {
 990         struct test_args targs = {
 991                 .tfirst = 0,
 992                 .tlast = NTESTS - 1,
 993                 .nslots = -1,
 994                 .seconds = 5,
 995                 .runs = 1,
 996         };
 997         struct test_result rbestslottime;
 998         int tctr;
 999
1000         /* Tell stdout not to buffer its content */
1001         setbuf(stdout, NULL);
1002
1003         if (!parse_args(argc, argv, &targs))
1004                 return -1;
1005
1006         rbestslottime.slottimens = 0;
1007         for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
1008                 const struct test_data *data = &tests[tctr];
1009                 unsigned int runctr;
1010                 struct test_result rbestruntime;
1011
1012                 if (tctr > targs.tfirst)
1013                         pr_info("\n");
1014
1015                 pr_info("Testing %s performance with %i runs, %d seconds each\n",
1016                         data->name, targs.runs, targs.seconds);
1017
1018                 rbestruntime.runtimens = 0;
1019                 for (runctr = 0; runctr < targs.runs; runctr++)
1020                         if (!test_loop(data, &targs,
1021                                        &rbestslottime, &rbestruntime))
1022                                 break;
1023
1024                 if (rbestruntime.runtimens)
1025                         pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
1026                                 rbestruntime.iter_runtime.tv_sec,
1027                                 rbestruntime.iter_runtime.tv_nsec,
1028                                 rbestruntime.nloops);
1029         }
1030
1031         if (rbestslottime.slottimens)
1032                 pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
1033                         rbestslottime.slot_runtime.tv_sec,
1034                         rbestslottime.slot_runtime.tv_nsec);
1035
1036         return 0;
1037 }