1 // SPDX-License-Identifier: GPL-2.0-only
5 * Copyright © 2021 Amazon.com, Inc. or its affiliates.
7 * Xen shared_info / pvclock testing
10 #include "test_util.h"
12 #include "processor.h"
19 #include <sys/eventfd.h>
23 #define SHINFO_REGION_GVA 0xc0000000ULL
24 #define SHINFO_REGION_GPA 0xc0000000ULL
25 #define SHINFO_REGION_SLOT 10
26 #define PAGE_SIZE 4096
28 #define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
29 #define DUMMY_REGION_SLOT 11
31 #define SHINFO_ADDR (SHINFO_REGION_GPA)
32 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
33 #define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
34 #define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
36 #define SHINFO_VADDR (SHINFO_REGION_GVA)
37 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
38 #define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
40 #define EVTCHN_VECTOR 0x10
42 static struct kvm_vm *vm;
44 #define XEN_HYPERCALL_MSR 0x40000000
46 #define MIN_STEAL_TIME 50000
48 struct pvclock_vcpu_time_info {
53 u32 tsc_to_system_mul;
57 } __attribute__((__packed__)); /* 32 bytes */
59 struct pvclock_wall_clock {
63 } __attribute__((__packed__));
65 struct vcpu_runstate_info {
67 uint64_t state_entry_time;
71 struct arch_vcpu_info {
73 unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
77 uint8_t evtchn_upcall_pending;
78 uint8_t evtchn_upcall_mask;
79 unsigned long evtchn_pending_sel;
80 struct arch_vcpu_info arch;
81 struct pvclock_vcpu_time_info time;
82 }; /* 64 bytes (x86) */
85 struct vcpu_info vcpu_info[32];
86 unsigned long evtchn_pending[64];
87 unsigned long evtchn_mask[64];
88 struct pvclock_wall_clock wc;
90 /* arch_shared_info here */
93 #define RUNSTATE_running 0
94 #define RUNSTATE_runnable 1
95 #define RUNSTATE_blocked 2
96 #define RUNSTATE_offline 3
98 static const char *runstate_names[] = {
106 struct kvm_irq_routing info;
107 struct kvm_irq_routing_entry entries[2];
110 static void evtchn_handler(struct ex_regs *regs)
112 struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
113 vi->evtchn_upcall_pending = 0;
114 vi->evtchn_pending_sel = 0;
119 static void guest_code(void)
121 struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
123 __asm__ __volatile__(
128 /* Trigger an interrupt injection */
131 /* Test having the host set runstates manually */
132 GUEST_SYNC(RUNSTATE_runnable);
133 GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
134 GUEST_ASSERT(rs->state == 0);
136 GUEST_SYNC(RUNSTATE_blocked);
137 GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
138 GUEST_ASSERT(rs->state == 0);
140 GUEST_SYNC(RUNSTATE_offline);
141 GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
142 GUEST_ASSERT(rs->state == 0);
144 /* Test runstate time adjust */
146 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
147 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
149 /* Test runstate time set */
151 GUEST_ASSERT(rs->state_entry_time >= 0x8000);
152 GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
153 GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
154 GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
156 /* sched_yield() should result in some 'runnable' time */
158 GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
160 /* Attempt to deliver a *masked* interrupt */
163 /* Wait until we see the bit set */
164 struct shared_info *si = (void *)SHINFO_VADDR;
165 while (!si->evtchn_pending[0])
166 __asm__ __volatile__ ("rep nop" : : : "memory");
168 /* Now deliver an *unmasked* interrupt */
171 while (!si->evtchn_pending[1])
172 __asm__ __volatile__ ("rep nop" : : : "memory");
174 /* Change memslots and deliver an interrupt */
178 __asm__ __volatile__ ("rep nop" : : : "memory");
181 static int cmp_timespec(struct timespec *a, struct timespec *b)
183 if (a->tv_sec > b->tv_sec)
185 else if (a->tv_sec < b->tv_sec)
187 else if (a->tv_nsec > b->tv_nsec)
189 else if (a->tv_nsec < b->tv_nsec)
195 static void handle_alrm(int sig)
197 TEST_FAIL("IRQ delivery timed out");
200 int main(int argc, char *argv[])
202 struct timespec min_ts, max_ts, vm_ts;
205 verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
206 !strncmp(argv[1], "--verbose", 10));
208 int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
209 if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
210 print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
214 bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
215 bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
217 clock_gettime(CLOCK_REALTIME, &min_ts);
219 vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
220 vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
222 /* Map a region for the shared_info page */
223 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
224 SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
225 virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
227 struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
229 int zero_fd = open("/dev/zero", O_RDONLY);
230 TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
232 struct kvm_xen_hvm_config hvmc = {
233 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
234 .msr = XEN_HYPERCALL_MSR,
236 vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
238 struct kvm_xen_hvm_attr lm = {
239 .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
242 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
244 struct kvm_xen_hvm_attr ha = {
245 .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
246 .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
248 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
251 * Test what happens when the HVA of the shinfo page is remapped after
252 * the kernel has a reference to it. But make sure we copy the clock
253 * info over since that's only set at setup time, and we test it later.
255 struct pvclock_wall_clock wc_copy = shinfo->wc;
256 void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
257 TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
258 shinfo->wc = wc_copy;
260 struct kvm_xen_vcpu_attr vi = {
261 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
262 .u.gpa = VCPU_INFO_ADDR,
264 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
266 struct kvm_xen_vcpu_attr pvclock = {
267 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
268 .u.gpa = PVTIME_ADDR,
270 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
272 struct kvm_xen_hvm_attr vec = {
273 .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
274 .u.vector = EVTCHN_VECTOR,
276 vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
278 vm_init_descriptor_tables(vm);
279 vcpu_init_descriptor_tables(vm, VCPU_ID);
280 vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
282 if (do_runstate_tests) {
283 struct kvm_xen_vcpu_attr st = {
284 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
285 .u.gpa = RUNSTATE_ADDR,
287 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
290 int irq_fd[2] = { -1, -1 };
292 if (do_eventfd_tests) {
293 irq_fd[0] = eventfd(0, 0);
294 irq_fd[1] = eventfd(0, 0);
296 /* Unexpected, but not a KVM failure */
297 if (irq_fd[0] == -1 || irq_fd[1] == -1)
298 do_eventfd_tests = false;
301 if (do_eventfd_tests) {
302 irq_routes.info.nr = 2;
304 irq_routes.entries[0].gsi = 32;
305 irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
306 irq_routes.entries[0].u.xen_evtchn.port = 15;
307 irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
308 irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
310 irq_routes.entries[1].gsi = 33;
311 irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
312 irq_routes.entries[1].u.xen_evtchn.port = 66;
313 irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
314 irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
316 vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
318 struct kvm_irqfd ifd = { };
322 vm_ioctl(vm, KVM_IRQFD, &ifd);
326 vm_ioctl(vm, KVM_IRQFD, &ifd);
328 struct sigaction sa = { };
329 sa.sa_handler = handle_alrm;
330 sigaction(SIGALRM, &sa, NULL);
333 struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
334 vinfo->evtchn_upcall_pending = 0;
336 struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
339 bool evtchn_irq_expected = false;
342 volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
345 vcpu_run(vm, VCPU_ID);
347 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
348 "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
350 exit_reason_str(run->exit_reason));
352 switch (get_ucall(vm, VCPU_ID, &uc)) {
354 TEST_FAIL("%s", (const char *)uc.args[0]);
357 struct kvm_xen_vcpu_attr rst;
360 if (do_runstate_tests)
361 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
362 rs->time[1] + rs->time[2] + rs->time[3],
363 "runstate times don't add up");
365 switch (uc.args[1]) {
368 printf("Delivering evtchn upcall\n");
369 evtchn_irq_expected = true;
370 vinfo->evtchn_upcall_pending = 1;
373 case RUNSTATE_runnable...RUNSTATE_offline:
374 TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
375 if (!do_runstate_tests)
378 printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
379 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
380 rst.u.runstate.state = uc.args[1];
381 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
386 printf("Testing RUNSTATE_ADJUST\n");
387 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
388 memset(&rst.u, 0, sizeof(rst.u));
389 rst.u.runstate.state = (uint64_t)-1;
390 rst.u.runstate.time_blocked =
391 0x5a - rs->time[RUNSTATE_blocked];
392 rst.u.runstate.time_offline =
393 0x6b6b - rs->time[RUNSTATE_offline];
394 rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
395 rst.u.runstate.time_offline;
396 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
401 printf("Testing RUNSTATE_DATA\n");
402 rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
403 memset(&rst.u, 0, sizeof(rst.u));
404 rst.u.runstate.state = RUNSTATE_running;
405 rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
406 rst.u.runstate.time_blocked = 0x6b6b;
407 rst.u.runstate.time_offline = 0x5a;
408 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
413 printf("Testing steal time\n");
414 /* Yield until scheduler delay exceeds target */
415 rundelay = get_run_delay() + MIN_STEAL_TIME;
418 } while (get_run_delay() < rundelay);
422 if (!do_eventfd_tests)
425 printf("Testing masked event channel\n");
426 shinfo->evtchn_mask[0] = 0x8000;
427 eventfd_write(irq_fd[0], 1UL);
433 printf("Testing unmasked event channel\n");
434 /* Unmask that, but deliver the other one */
435 shinfo->evtchn_pending[0] = 0;
436 shinfo->evtchn_mask[0] = 0;
437 eventfd_write(irq_fd[1], 1UL);
438 evtchn_irq_expected = true;
444 printf("Testing event channel after memslot change\n");
445 vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
446 DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
447 eventfd_write(irq_fd[0], 1UL);
448 evtchn_irq_expected = true;
453 TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
454 evtchn_irq_expected = false;
455 if (shinfo->evtchn_pending[1] &&
456 shinfo->evtchn_pending[0])
465 TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
470 clock_gettime(CLOCK_REALTIME, &max_ts);
473 * Just a *really* basic check that things are being put in the
474 * right place. The actual calculations are much the same for
475 * Xen as they are for the KVM variants, so no need to check.
477 struct pvclock_wall_clock *wc;
478 struct pvclock_vcpu_time_info *ti, *ti2;
480 wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
481 ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
482 ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
485 printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
486 printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
487 ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
488 ti->tsc_shift, ti->flags);
489 printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
490 ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
491 ti2->tsc_shift, ti2->flags);
494 vm_ts.tv_sec = wc->sec;
495 vm_ts.tv_nsec = wc->nsec;
496 TEST_ASSERT(wc->version && !(wc->version & 1),
497 "Bad wallclock version %x", wc->version);
498 TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
499 TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
501 TEST_ASSERT(ti->version && !(ti->version & 1),
502 "Bad time_info version %x", ti->version);
503 TEST_ASSERT(ti2->version && !(ti2->version & 1),
504 "Bad time_info version %x", ti->version);
506 if (do_runstate_tests) {
508 * Fetch runstate and check sanity. Strictly speaking in the
509 * general case we might not expect the numbers to be identical
510 * but in this case we know we aren't running the vCPU any more.
512 struct kvm_xen_vcpu_attr rst = {
513 .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
515 vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
518 printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
519 rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
520 rs->state, rs->state_entry_time);
521 for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
522 printf("State %s: %" PRIu64 " ns\n",
523 runstate_names[i], rs->time[i]);
526 TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
527 TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
528 "State entry time mismatch");
529 TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
530 "Running time mismatch");
531 TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
532 "Runnable time mismatch");
533 TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
534 "Blocked time mismatch");
535 TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
536 "Offline time mismatch");
538 TEST_ASSERT(rs->state_entry_time == rs->time[0] +
539 rs->time[1] + rs->time[2] + rs->time[3],
540 "runstate times don't add up");