Merge tag 'for-linus-5.17-1' of https://github.com/cminyard/linux-ipmi
[linux-2.6-microblaze.git] / tools / testing / selftests / kvm / x86_64 / xen_shinfo_test.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * svm_vmcall_test
4  *
5  * Copyright © 2021 Amazon.com, Inc. or its affiliates.
6  *
7  * Xen shared_info / pvclock testing
8  */
9
10 #include "test_util.h"
11 #include "kvm_util.h"
12 #include "processor.h"
13
14 #include <stdint.h>
15 #include <time.h>
16 #include <sched.h>
17 #include <signal.h>
18
19 #include <sys/eventfd.h>
20
21 #define VCPU_ID         5
22
23 #define SHINFO_REGION_GVA       0xc0000000ULL
24 #define SHINFO_REGION_GPA       0xc0000000ULL
25 #define SHINFO_REGION_SLOT      10
26 #define PAGE_SIZE               4096
27
28 #define DUMMY_REGION_GPA        (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
29 #define DUMMY_REGION_SLOT       11
30
31 #define SHINFO_ADDR     (SHINFO_REGION_GPA)
32 #define PVTIME_ADDR     (SHINFO_REGION_GPA + PAGE_SIZE)
33 #define RUNSTATE_ADDR   (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
34 #define VCPU_INFO_ADDR  (SHINFO_REGION_GPA + 0x40)
35
36 #define SHINFO_VADDR    (SHINFO_REGION_GVA)
37 #define RUNSTATE_VADDR  (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
38 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
39
40 #define EVTCHN_VECTOR   0x10
41
42 static struct kvm_vm *vm;
43
44 #define XEN_HYPERCALL_MSR       0x40000000
45
46 #define MIN_STEAL_TIME          50000
47
48 struct pvclock_vcpu_time_info {
49         u32   version;
50         u32   pad0;
51         u64   tsc_timestamp;
52         u64   system_time;
53         u32   tsc_to_system_mul;
54         s8    tsc_shift;
55         u8    flags;
56         u8    pad[2];
57 } __attribute__((__packed__)); /* 32 bytes */
58
59 struct pvclock_wall_clock {
60         u32   version;
61         u32   sec;
62         u32   nsec;
63 } __attribute__((__packed__));
64
65 struct vcpu_runstate_info {
66     uint32_t state;
67     uint64_t state_entry_time;
68     uint64_t time[4];
69 };
70
71 struct arch_vcpu_info {
72     unsigned long cr2;
73     unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
74 };
75
76 struct vcpu_info {
77         uint8_t evtchn_upcall_pending;
78         uint8_t evtchn_upcall_mask;
79         unsigned long evtchn_pending_sel;
80         struct arch_vcpu_info arch;
81         struct pvclock_vcpu_time_info time;
82 }; /* 64 bytes (x86) */
83
84 struct shared_info {
85         struct vcpu_info vcpu_info[32];
86         unsigned long evtchn_pending[64];
87         unsigned long evtchn_mask[64];
88         struct pvclock_wall_clock wc;
89         uint32_t wc_sec_hi;
90         /* arch_shared_info here */
91 };
92
93 #define RUNSTATE_running  0
94 #define RUNSTATE_runnable 1
95 #define RUNSTATE_blocked  2
96 #define RUNSTATE_offline  3
97
98 static const char *runstate_names[] = {
99         "running",
100         "runnable",
101         "blocked",
102         "offline"
103 };
104
105 struct {
106         struct kvm_irq_routing info;
107         struct kvm_irq_routing_entry entries[2];
108 } irq_routes;
109
110 static void evtchn_handler(struct ex_regs *regs)
111 {
112         struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
113         vi->evtchn_upcall_pending = 0;
114         vi->evtchn_pending_sel = 0;
115
116         GUEST_SYNC(0x20);
117 }
118
119 static void guest_code(void)
120 {
121         struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
122
123         __asm__ __volatile__(
124                 "sti\n"
125                 "nop\n"
126         );
127
128         /* Trigger an interrupt injection */
129         GUEST_SYNC(0);
130
131         /* Test having the host set runstates manually */
132         GUEST_SYNC(RUNSTATE_runnable);
133         GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
134         GUEST_ASSERT(rs->state == 0);
135
136         GUEST_SYNC(RUNSTATE_blocked);
137         GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
138         GUEST_ASSERT(rs->state == 0);
139
140         GUEST_SYNC(RUNSTATE_offline);
141         GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
142         GUEST_ASSERT(rs->state == 0);
143
144         /* Test runstate time adjust */
145         GUEST_SYNC(4);
146         GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
147         GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
148
149         /* Test runstate time set */
150         GUEST_SYNC(5);
151         GUEST_ASSERT(rs->state_entry_time >= 0x8000);
152         GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
153         GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
154         GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
155
156         /* sched_yield() should result in some 'runnable' time */
157         GUEST_SYNC(6);
158         GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
159
160         /* Attempt to deliver a *masked* interrupt */
161         GUEST_SYNC(7);
162
163         /* Wait until we see the bit set */
164         struct shared_info *si = (void *)SHINFO_VADDR;
165         while (!si->evtchn_pending[0])
166                 __asm__ __volatile__ ("rep nop" : : : "memory");
167
168         /* Now deliver an *unmasked* interrupt */
169         GUEST_SYNC(8);
170
171         while (!si->evtchn_pending[1])
172                 __asm__ __volatile__ ("rep nop" : : : "memory");
173
174         /* Change memslots and deliver an interrupt */
175         GUEST_SYNC(9);
176
177         for (;;)
178                 __asm__ __volatile__ ("rep nop" : : : "memory");
179 }
180
181 static int cmp_timespec(struct timespec *a, struct timespec *b)
182 {
183         if (a->tv_sec > b->tv_sec)
184                 return 1;
185         else if (a->tv_sec < b->tv_sec)
186                 return -1;
187         else if (a->tv_nsec > b->tv_nsec)
188                 return 1;
189         else if (a->tv_nsec < b->tv_nsec)
190                 return -1;
191         else
192                 return 0;
193 }
194
195 static void handle_alrm(int sig)
196 {
197         TEST_FAIL("IRQ delivery timed out");
198 }
199
200 int main(int argc, char *argv[])
201 {
202         struct timespec min_ts, max_ts, vm_ts;
203         bool verbose;
204
205         verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
206                                !strncmp(argv[1], "--verbose", 10));
207
208         int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
209         if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
210                 print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
211                 exit(KSFT_SKIP);
212         }
213
214         bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
215         bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
216
217         clock_gettime(CLOCK_REALTIME, &min_ts);
218
219         vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
220         vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
221
222         /* Map a region for the shared_info page */
223         vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
224                                     SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
225         virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
226
227         struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
228
229         int zero_fd = open("/dev/zero", O_RDONLY);
230         TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
231
232         struct kvm_xen_hvm_config hvmc = {
233                 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
234                 .msr = XEN_HYPERCALL_MSR,
235         };
236         vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
237
238         struct kvm_xen_hvm_attr lm = {
239                 .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
240                 .u.long_mode = 1,
241         };
242         vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
243
244         struct kvm_xen_hvm_attr ha = {
245                 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
246                 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
247         };
248         vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
249
250         /*
251          * Test what happens when the HVA of the shinfo page is remapped after
252          * the kernel has a reference to it. But make sure we copy the clock
253          * info over since that's only set at setup time, and we test it later.
254          */
255         struct pvclock_wall_clock wc_copy = shinfo->wc;
256         void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
257         TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
258         shinfo->wc = wc_copy;
259
260         struct kvm_xen_vcpu_attr vi = {
261                 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
262                 .u.gpa = VCPU_INFO_ADDR,
263         };
264         vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
265
266         struct kvm_xen_vcpu_attr pvclock = {
267                 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
268                 .u.gpa = PVTIME_ADDR,
269         };
270         vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
271
272         struct kvm_xen_hvm_attr vec = {
273                 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
274                 .u.vector = EVTCHN_VECTOR,
275         };
276         vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
277
278         vm_init_descriptor_tables(vm);
279         vcpu_init_descriptor_tables(vm, VCPU_ID);
280         vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
281
282         if (do_runstate_tests) {
283                 struct kvm_xen_vcpu_attr st = {
284                         .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
285                         .u.gpa = RUNSTATE_ADDR,
286                 };
287                 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
288         }
289
290         int irq_fd[2] = { -1, -1 };
291
292         if (do_eventfd_tests) {
293                 irq_fd[0] = eventfd(0, 0);
294                 irq_fd[1] = eventfd(0, 0);
295
296                 /* Unexpected, but not a KVM failure */
297                 if (irq_fd[0] == -1 || irq_fd[1] == -1)
298                         do_eventfd_tests = false;
299         }
300
301         if (do_eventfd_tests) {
302                 irq_routes.info.nr = 2;
303
304                 irq_routes.entries[0].gsi = 32;
305                 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
306                 irq_routes.entries[0].u.xen_evtchn.port = 15;
307                 irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
308                 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
309
310                 irq_routes.entries[1].gsi = 33;
311                 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
312                 irq_routes.entries[1].u.xen_evtchn.port = 66;
313                 irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
314                 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
315
316                 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
317
318                 struct kvm_irqfd ifd = { };
319
320                 ifd.fd = irq_fd[0];
321                 ifd.gsi = 32;
322                 vm_ioctl(vm, KVM_IRQFD, &ifd);
323
324                 ifd.fd = irq_fd[1];
325                 ifd.gsi = 33;
326                 vm_ioctl(vm, KVM_IRQFD, &ifd);
327
328                 struct sigaction sa = { };
329                 sa.sa_handler = handle_alrm;
330                 sigaction(SIGALRM, &sa, NULL);
331         }
332
333         struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
334         vinfo->evtchn_upcall_pending = 0;
335
336         struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
337         rs->state = 0x5a;
338
339         bool evtchn_irq_expected = false;
340
341         for (;;) {
342                 volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
343                 struct ucall uc;
344
345                 vcpu_run(vm, VCPU_ID);
346
347                 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
348                             "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
349                             run->exit_reason,
350                             exit_reason_str(run->exit_reason));
351
352                 switch (get_ucall(vm, VCPU_ID, &uc)) {
353                 case UCALL_ABORT:
354                         TEST_FAIL("%s", (const char *)uc.args[0]);
355                         /* NOT REACHED */
356                 case UCALL_SYNC: {
357                         struct kvm_xen_vcpu_attr rst;
358                         long rundelay;
359
360                         if (do_runstate_tests)
361                                 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
362                                             rs->time[1] + rs->time[2] + rs->time[3],
363                                             "runstate times don't add up");
364
365                         switch (uc.args[1]) {
366                         case 0:
367                                 if (verbose)
368                                         printf("Delivering evtchn upcall\n");
369                                 evtchn_irq_expected = true;
370                                 vinfo->evtchn_upcall_pending = 1;
371                                 break;
372
373                         case RUNSTATE_runnable...RUNSTATE_offline:
374                                 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
375                                 if (!do_runstate_tests)
376                                         goto done;
377                                 if (verbose)
378                                         printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
379                                 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
380                                 rst.u.runstate.state = uc.args[1];
381                                 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
382                                 break;
383
384                         case 4:
385                                 if (verbose)
386                                         printf("Testing RUNSTATE_ADJUST\n");
387                                 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
388                                 memset(&rst.u, 0, sizeof(rst.u));
389                                 rst.u.runstate.state = (uint64_t)-1;
390                                 rst.u.runstate.time_blocked =
391                                         0x5a - rs->time[RUNSTATE_blocked];
392                                 rst.u.runstate.time_offline =
393                                         0x6b6b - rs->time[RUNSTATE_offline];
394                                 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
395                                         rst.u.runstate.time_offline;
396                                 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
397                                 break;
398
399                         case 5:
400                                 if (verbose)
401                                         printf("Testing RUNSTATE_DATA\n");
402                                 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
403                                 memset(&rst.u, 0, sizeof(rst.u));
404                                 rst.u.runstate.state = RUNSTATE_running;
405                                 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
406                                 rst.u.runstate.time_blocked = 0x6b6b;
407                                 rst.u.runstate.time_offline = 0x5a;
408                                 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
409                                 break;
410
411                         case 6:
412                                 if (verbose)
413                                         printf("Testing steal time\n");
414                                 /* Yield until scheduler delay exceeds target */
415                                 rundelay = get_run_delay() + MIN_STEAL_TIME;
416                                 do {
417                                         sched_yield();
418                                 } while (get_run_delay() < rundelay);
419                                 break;
420
421                         case 7:
422                                 if (!do_eventfd_tests)
423                                         goto done;
424                                 if (verbose)
425                                         printf("Testing masked event channel\n");
426                                 shinfo->evtchn_mask[0] = 0x8000;
427                                 eventfd_write(irq_fd[0], 1UL);
428                                 alarm(1);
429                                 break;
430
431                         case 8:
432                                 if (verbose)
433                                         printf("Testing unmasked event channel\n");
434                                 /* Unmask that, but deliver the other one */
435                                 shinfo->evtchn_pending[0] = 0;
436                                 shinfo->evtchn_mask[0] = 0;
437                                 eventfd_write(irq_fd[1], 1UL);
438                                 evtchn_irq_expected = true;
439                                 alarm(1);
440                                 break;
441
442                         case 9:
443                                 if (verbose)
444                                         printf("Testing event channel after memslot change\n");
445                                 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
446                                                             DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
447                                 eventfd_write(irq_fd[0], 1UL);
448                                 evtchn_irq_expected = true;
449                                 alarm(1);
450                                 break;
451
452                         case 0x20:
453                                 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
454                                 evtchn_irq_expected = false;
455                                 if (shinfo->evtchn_pending[1] &&
456                                     shinfo->evtchn_pending[0])
457                                         goto done;
458                                 break;
459                         }
460                         break;
461                 }
462                 case UCALL_DONE:
463                         goto done;
464                 default:
465                         TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
466                 }
467         }
468
469  done:
470         clock_gettime(CLOCK_REALTIME, &max_ts);
471
472         /*
473          * Just a *really* basic check that things are being put in the
474          * right place. The actual calculations are much the same for
475          * Xen as they are for the KVM variants, so no need to check.
476          */
477         struct pvclock_wall_clock *wc;
478         struct pvclock_vcpu_time_info *ti, *ti2;
479
480         wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
481         ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
482         ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
483
484         if (verbose) {
485                 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
486                 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
487                        ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
488                        ti->tsc_shift, ti->flags);
489                 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
490                        ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
491                        ti2->tsc_shift, ti2->flags);
492         }
493
494         vm_ts.tv_sec = wc->sec;
495         vm_ts.tv_nsec = wc->nsec;
496         TEST_ASSERT(wc->version && !(wc->version & 1),
497                     "Bad wallclock version %x", wc->version);
498         TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
499         TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
500
501         TEST_ASSERT(ti->version && !(ti->version & 1),
502                     "Bad time_info version %x", ti->version);
503         TEST_ASSERT(ti2->version && !(ti2->version & 1),
504                     "Bad time_info version %x", ti->version);
505
506         if (do_runstate_tests) {
507                 /*
508                  * Fetch runstate and check sanity. Strictly speaking in the
509                  * general case we might not expect the numbers to be identical
510                  * but in this case we know we aren't running the vCPU any more.
511                  */
512                 struct kvm_xen_vcpu_attr rst = {
513                         .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
514                 };
515                 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
516
517                 if (verbose) {
518                         printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
519                                rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
520                                rs->state, rs->state_entry_time);
521                         for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
522                                 printf("State %s: %" PRIu64 " ns\n",
523                                        runstate_names[i], rs->time[i]);
524                         }
525                 }
526                 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
527                 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
528                             "State entry time mismatch");
529                 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
530                             "Running time mismatch");
531                 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
532                             "Runnable time mismatch");
533                 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
534                             "Blocked time mismatch");
535                 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
536                             "Offline time mismatch");
537
538                 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
539                             rs->time[1] + rs->time[2] + rs->time[3],
540                             "runstate times don't add up");
541         }
542         kvm_vm_free(vm);
543         return 0;
544 }