2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
49 #define KMSG_COMPONENT "kvm-s390"
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
53 #define CREATE_TRACE_POINTS
55 #include "trace-s390.h"
57 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 (KVM_MAX_VCPUS + LOCAL_IRQS))
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 { "userspace_handled", VCPU_STAT(exit_userspace) },
66 { "exit_null", VCPU_STAT(exit_null) },
67 { "exit_validity", VCPU_STAT(exit_validity) },
68 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 { "exit_external_request", VCPU_STAT(exit_external_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 { "instruction_spx", VCPU_STAT(instruction_spx) },
96 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 { "instruction_stap", VCPU_STAT(instruction_stap) },
98 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 { "instruction_essa", VCPU_STAT(instruction_essa) },
103 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 { "instruction_sie", VCPU_STAT(instruction_sie) },
108 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 { "diagnose_10", VCPU_STAT(diagnose_10) },
125 { "diagnose_44", VCPU_STAT(diagnose_44) },
126 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 { "diagnose_258", VCPU_STAT(diagnose_258) },
128 { "diagnose_308", VCPU_STAT(diagnose_308) },
129 { "diagnose_500", VCPU_STAT(diagnose_500) },
133 /* allow nested virtualization in KVM (if enabled by user space) */
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
141 unsigned long kvm_s390_fac_list_mask_size(void)
143 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144 return ARRAY_SIZE(kvm_s390_fac_list_mask);
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
159 /* every s390 is virtualization enabled ;-) */
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
167 * This callback is executed during stop_machine(). All CPUs are therefore
168 * temporarily stopped. In order not to change guest behavior, we have to
169 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170 * so a CPU won't be stopped while calculating with the epoch.
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
176 struct kvm_vcpu *vcpu;
178 unsigned long long *delta = v;
180 list_for_each_entry(kvm, &vm_list, vm_list) {
181 kvm->arch.epoch -= *delta;
182 kvm_for_each_vcpu(i, vcpu, kvm) {
183 vcpu->arch.sie_block->epoch -= *delta;
184 if (vcpu->arch.cputm_enabled)
185 vcpu->arch.cputm_start += *delta;
186 if (vcpu->arch.vsie_block)
187 vcpu->arch.vsie_block->epoch -= *delta;
193 static struct notifier_block kvm_clock_notifier = {
194 .notifier_call = kvm_clock_sync,
197 int kvm_arch_hardware_setup(void)
199 gmap_notifier.notifier_call = kvm_gmap_notifier;
200 gmap_register_pte_notifier(&gmap_notifier);
201 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202 gmap_register_pte_notifier(&vsie_gmap_notifier);
203 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204 &kvm_clock_notifier);
208 void kvm_arch_hardware_unsetup(void)
210 gmap_unregister_pte_notifier(&gmap_notifier);
211 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213 &kvm_clock_notifier);
216 static void allow_cpu_feat(unsigned long nr)
218 set_bit_inv(nr, kvm_s390_available_cpu_feat);
221 static inline int plo_test_bit(unsigned char nr)
223 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
227 /* Parameter registers are ignored for "test bit" */
237 static void kvm_s390_cpu_feat_init(void)
241 for (i = 0; i < 256; ++i) {
243 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
246 if (test_facility(28)) /* TOD-clock steering */
247 ptff(kvm_s390_available_subfunc.ptff,
248 sizeof(kvm_s390_available_subfunc.ptff),
251 if (test_facility(17)) { /* MSA */
252 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253 kvm_s390_available_subfunc.kmac);
254 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255 kvm_s390_available_subfunc.kmc);
256 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257 kvm_s390_available_subfunc.km);
258 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259 kvm_s390_available_subfunc.kimd);
260 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261 kvm_s390_available_subfunc.klmd);
263 if (test_facility(76)) /* MSA3 */
264 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265 kvm_s390_available_subfunc.pckmo);
266 if (test_facility(77)) { /* MSA4 */
267 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268 kvm_s390_available_subfunc.kmctr);
269 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270 kvm_s390_available_subfunc.kmf);
271 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272 kvm_s390_available_subfunc.kmo);
273 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274 kvm_s390_available_subfunc.pcc);
276 if (test_facility(57)) /* MSA5 */
277 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278 kvm_s390_available_subfunc.ppno);
280 if (test_facility(146)) /* MSA8 */
281 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282 kvm_s390_available_subfunc.kma);
284 if (MACHINE_HAS_ESOP)
285 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
287 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
290 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291 !test_facility(3) || !nested)
293 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294 if (sclp.has_64bscao)
295 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
297 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
299 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
301 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
303 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
305 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
307 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
309 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
311 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312 * all skey handling functions read/set the skey from the PGSTE
313 * instead of the real storage key.
315 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316 * pages being detected as preserved although they are resident.
318 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
321 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
325 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326 * cannot easily shadow the SCA because of the ipte lock.
330 int kvm_arch_init(void *opaque)
332 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
336 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337 debug_unregister(kvm_s390_dbf);
341 kvm_s390_cpu_feat_init();
343 /* Register floating interrupt controller interface. */
344 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
347 void kvm_arch_exit(void)
349 debug_unregister(kvm_s390_dbf);
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354 unsigned int ioctl, unsigned long arg)
356 if (ioctl == KVM_S390_ENABLE_SIE)
357 return s390_enable_sie();
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
366 case KVM_CAP_S390_PSW:
367 case KVM_CAP_S390_GMAP:
368 case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370 case KVM_CAP_S390_UCONTROL:
372 case KVM_CAP_ASYNC_PF:
373 case KVM_CAP_SYNC_REGS:
374 case KVM_CAP_ONE_REG:
375 case KVM_CAP_ENABLE_CAP:
376 case KVM_CAP_S390_CSS_SUPPORT:
377 case KVM_CAP_IOEVENTFD:
378 case KVM_CAP_DEVICE_CTRL:
379 case KVM_CAP_ENABLE_CAP_VM:
380 case KVM_CAP_S390_IRQCHIP:
381 case KVM_CAP_VM_ATTRIBUTES:
382 case KVM_CAP_MP_STATE:
383 case KVM_CAP_IMMEDIATE_EXIT:
384 case KVM_CAP_S390_INJECT_IRQ:
385 case KVM_CAP_S390_USER_SIGP:
386 case KVM_CAP_S390_USER_STSI:
387 case KVM_CAP_S390_SKEYS:
388 case KVM_CAP_S390_IRQ_STATE:
389 case KVM_CAP_S390_USER_INSTR0:
390 case KVM_CAP_S390_CMMA_MIGRATION:
391 case KVM_CAP_S390_AIS:
394 case KVM_CAP_S390_MEM_OP:
397 case KVM_CAP_NR_VCPUS:
398 case KVM_CAP_MAX_VCPUS:
399 r = KVM_S390_BSCA_CPU_SLOTS;
400 if (!kvm_s390_use_sca_entries())
402 else if (sclp.has_esca && sclp.has_64bscao)
403 r = KVM_S390_ESCA_CPU_SLOTS;
405 case KVM_CAP_NR_MEMSLOTS:
406 r = KVM_USER_MEM_SLOTS;
408 case KVM_CAP_S390_COW:
409 r = MACHINE_HAS_ESOP;
411 case KVM_CAP_S390_VECTOR_REGISTERS:
414 case KVM_CAP_S390_RI:
415 r = test_facility(64);
417 case KVM_CAP_S390_GS:
418 r = test_facility(133);
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427 struct kvm_memory_slot *memslot)
429 gfn_t cur_gfn, last_gfn;
430 unsigned long address;
431 struct gmap *gmap = kvm->arch.gmap;
433 /* Loop over all guest pages */
434 last_gfn = memslot->base_gfn + memslot->npages;
435 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436 address = gfn_to_hva_memslot(memslot, cur_gfn);
438 if (test_and_clear_guest_dirty(gmap->mm, address))
439 mark_page_dirty(kvm, cur_gfn);
440 if (fatal_signal_pending(current))
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
450 * Get (and clear) the dirty memory log for a memory slot.
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453 struct kvm_dirty_log *log)
457 struct kvm_memslots *slots;
458 struct kvm_memory_slot *memslot;
461 if (kvm_is_ucontrol(kvm))
464 mutex_lock(&kvm->slots_lock);
467 if (log->slot >= KVM_USER_MEM_SLOTS)
470 slots = kvm_memslots(kvm);
471 memslot = id_to_memslot(slots, log->slot);
473 if (!memslot->dirty_bitmap)
476 kvm_s390_sync_dirty_log(kvm, memslot);
477 r = kvm_get_dirty_log(kvm, log, &is_dirty);
481 /* Clear the dirty log */
483 n = kvm_dirty_bitmap_bytes(memslot);
484 memset(memslot->dirty_bitmap, 0, n);
488 mutex_unlock(&kvm->slots_lock);
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
495 struct kvm_vcpu *vcpu;
497 kvm_for_each_vcpu(i, vcpu, kvm) {
498 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
510 case KVM_CAP_S390_IRQCHIP:
511 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512 kvm->arch.use_irqchip = 1;
515 case KVM_CAP_S390_USER_SIGP:
516 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517 kvm->arch.user_sigp = 1;
520 case KVM_CAP_S390_VECTOR_REGISTERS:
521 mutex_lock(&kvm->lock);
522 if (kvm->created_vcpus) {
524 } else if (MACHINE_HAS_VX) {
525 set_kvm_facility(kvm->arch.model.fac_mask, 129);
526 set_kvm_facility(kvm->arch.model.fac_list, 129);
527 if (test_facility(134)) {
528 set_kvm_facility(kvm->arch.model.fac_mask, 134);
529 set_kvm_facility(kvm->arch.model.fac_list, 134);
531 if (test_facility(135)) {
532 set_kvm_facility(kvm->arch.model.fac_mask, 135);
533 set_kvm_facility(kvm->arch.model.fac_list, 135);
538 mutex_unlock(&kvm->lock);
539 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540 r ? "(not available)" : "(success)");
542 case KVM_CAP_S390_RI:
544 mutex_lock(&kvm->lock);
545 if (kvm->created_vcpus) {
547 } else if (test_facility(64)) {
548 set_kvm_facility(kvm->arch.model.fac_mask, 64);
549 set_kvm_facility(kvm->arch.model.fac_list, 64);
552 mutex_unlock(&kvm->lock);
553 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554 r ? "(not available)" : "(success)");
556 case KVM_CAP_S390_AIS:
557 mutex_lock(&kvm->lock);
558 if (kvm->created_vcpus) {
561 set_kvm_facility(kvm->arch.model.fac_mask, 72);
562 set_kvm_facility(kvm->arch.model.fac_list, 72);
563 kvm->arch.float_int.ais_enabled = 1;
566 mutex_unlock(&kvm->lock);
567 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
568 r ? "(not available)" : "(success)");
570 case KVM_CAP_S390_GS:
572 mutex_lock(&kvm->lock);
573 if (atomic_read(&kvm->online_vcpus)) {
575 } else if (test_facility(133)) {
576 set_kvm_facility(kvm->arch.model.fac_mask, 133);
577 set_kvm_facility(kvm->arch.model.fac_list, 133);
580 mutex_unlock(&kvm->lock);
581 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
582 r ? "(not available)" : "(success)");
584 case KVM_CAP_S390_USER_STSI:
585 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
586 kvm->arch.user_stsi = 1;
589 case KVM_CAP_S390_USER_INSTR0:
590 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
591 kvm->arch.user_instr0 = 1;
592 icpt_operexc_on_all_vcpus(kvm);
602 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
606 switch (attr->attr) {
607 case KVM_S390_VM_MEM_LIMIT_SIZE:
609 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
610 kvm->arch.mem_limit);
611 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
621 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
625 switch (attr->attr) {
626 case KVM_S390_VM_MEM_ENABLE_CMMA:
632 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
633 mutex_lock(&kvm->lock);
634 if (!kvm->created_vcpus) {
635 kvm->arch.use_cmma = 1;
638 mutex_unlock(&kvm->lock);
640 case KVM_S390_VM_MEM_CLR_CMMA:
645 if (!kvm->arch.use_cmma)
648 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
649 mutex_lock(&kvm->lock);
650 idx = srcu_read_lock(&kvm->srcu);
651 s390_reset_cmma(kvm->arch.gmap->mm);
652 srcu_read_unlock(&kvm->srcu, idx);
653 mutex_unlock(&kvm->lock);
656 case KVM_S390_VM_MEM_LIMIT_SIZE: {
657 unsigned long new_limit;
659 if (kvm_is_ucontrol(kvm))
662 if (get_user(new_limit, (u64 __user *)attr->addr))
665 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
666 new_limit > kvm->arch.mem_limit)
672 /* gmap_create takes last usable address */
673 if (new_limit != KVM_S390_NO_MEM_LIMIT)
677 mutex_lock(&kvm->lock);
678 if (!kvm->created_vcpus) {
679 /* gmap_create will round the limit up */
680 struct gmap *new = gmap_create(current->mm, new_limit);
685 gmap_remove(kvm->arch.gmap);
687 kvm->arch.gmap = new;
691 mutex_unlock(&kvm->lock);
692 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
693 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
694 (void *) kvm->arch.gmap->asce);
704 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
706 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
708 struct kvm_vcpu *vcpu;
711 if (!test_kvm_facility(kvm, 76))
714 mutex_lock(&kvm->lock);
715 switch (attr->attr) {
716 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
718 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
719 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
720 kvm->arch.crypto.aes_kw = 1;
721 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
723 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
725 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
726 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
727 kvm->arch.crypto.dea_kw = 1;
728 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
730 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
731 kvm->arch.crypto.aes_kw = 0;
732 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
733 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
734 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
736 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
737 kvm->arch.crypto.dea_kw = 0;
738 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
739 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
740 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
743 mutex_unlock(&kvm->lock);
747 kvm_for_each_vcpu(i, vcpu, kvm) {
748 kvm_s390_vcpu_crypto_setup(vcpu);
751 mutex_unlock(&kvm->lock);
755 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
758 struct kvm_vcpu *vcpu;
760 kvm_for_each_vcpu(cx, vcpu, kvm)
761 kvm_s390_sync_request(req, vcpu);
765 * Must be called with kvm->srcu held to avoid races on memslots, and with
766 * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
768 static int kvm_s390_vm_start_migration(struct kvm *kvm)
770 struct kvm_s390_migration_state *mgs;
771 struct kvm_memory_slot *ms;
772 /* should be the only one */
773 struct kvm_memslots *slots;
774 unsigned long ram_pages;
777 /* migration mode already enabled */
778 if (kvm->arch.migration_state)
781 slots = kvm_memslots(kvm);
782 if (!slots || !slots->used_slots)
785 mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
788 kvm->arch.migration_state = mgs;
790 if (kvm->arch.use_cmma) {
792 * Get the last slot. They should be sorted by base_gfn, so the
793 * last slot is also the one at the end of the address space.
794 * We have verified above that at least one slot is present.
796 ms = slots->memslots + slots->used_slots - 1;
797 /* round up so we only use full longs */
798 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
799 /* allocate enough bytes to store all the bits */
800 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
801 if (!mgs->pgste_bitmap) {
803 kvm->arch.migration_state = NULL;
807 mgs->bitmap_size = ram_pages;
808 atomic64_set(&mgs->dirty_pages, ram_pages);
809 /* mark all the pages in active slots as dirty */
810 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
811 ms = slots->memslots + slotnr;
812 bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
815 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
821 * Must be called with kvm->lock to avoid races with ourselves and
822 * kvm_s390_vm_start_migration.
824 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
826 struct kvm_s390_migration_state *mgs;
828 /* migration mode already disabled */
829 if (!kvm->arch.migration_state)
831 mgs = kvm->arch.migration_state;
832 kvm->arch.migration_state = NULL;
834 if (kvm->arch.use_cmma) {
835 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
836 vfree(mgs->pgste_bitmap);
842 static int kvm_s390_vm_set_migration(struct kvm *kvm,
843 struct kvm_device_attr *attr)
845 int idx, res = -ENXIO;
847 mutex_lock(&kvm->lock);
848 switch (attr->attr) {
849 case KVM_S390_VM_MIGRATION_START:
850 idx = srcu_read_lock(&kvm->srcu);
851 res = kvm_s390_vm_start_migration(kvm);
852 srcu_read_unlock(&kvm->srcu, idx);
854 case KVM_S390_VM_MIGRATION_STOP:
855 res = kvm_s390_vm_stop_migration(kvm);
860 mutex_unlock(&kvm->lock);
865 static int kvm_s390_vm_get_migration(struct kvm *kvm,
866 struct kvm_device_attr *attr)
868 u64 mig = (kvm->arch.migration_state != NULL);
870 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
873 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
878 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
882 if (copy_from_user(>od_high, (void __user *)attr->addr,
888 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
893 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
897 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
900 kvm_s390_set_tod_clock(kvm, gtod);
901 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
905 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
912 switch (attr->attr) {
913 case KVM_S390_VM_TOD_HIGH:
914 ret = kvm_s390_set_tod_high(kvm, attr);
916 case KVM_S390_VM_TOD_LOW:
917 ret = kvm_s390_set_tod_low(kvm, attr);
926 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
930 if (copy_to_user((void __user *)attr->addr, >od_high,
933 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
938 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
942 gtod = kvm_s390_get_tod_clock_fast(kvm);
943 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
945 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
950 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
957 switch (attr->attr) {
958 case KVM_S390_VM_TOD_HIGH:
959 ret = kvm_s390_get_tod_high(kvm, attr);
961 case KVM_S390_VM_TOD_LOW:
962 ret = kvm_s390_get_tod_low(kvm, attr);
971 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
973 struct kvm_s390_vm_cpu_processor *proc;
974 u16 lowest_ibc, unblocked_ibc;
977 mutex_lock(&kvm->lock);
978 if (kvm->created_vcpus) {
982 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
987 if (!copy_from_user(proc, (void __user *)attr->addr,
989 kvm->arch.model.cpuid = proc->cpuid;
990 lowest_ibc = sclp.ibc >> 16 & 0xfff;
991 unblocked_ibc = sclp.ibc & 0xfff;
992 if (lowest_ibc && proc->ibc) {
993 if (proc->ibc > unblocked_ibc)
994 kvm->arch.model.ibc = unblocked_ibc;
995 else if (proc->ibc < lowest_ibc)
996 kvm->arch.model.ibc = lowest_ibc;
998 kvm->arch.model.ibc = proc->ibc;
1000 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1001 S390_ARCH_FAC_LIST_SIZE_BYTE);
1002 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1003 kvm->arch.model.ibc,
1004 kvm->arch.model.cpuid);
1005 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1006 kvm->arch.model.fac_list[0],
1007 kvm->arch.model.fac_list[1],
1008 kvm->arch.model.fac_list[2]);
1013 mutex_unlock(&kvm->lock);
1017 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1018 struct kvm_device_attr *attr)
1020 struct kvm_s390_vm_cpu_feat data;
1023 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1025 if (!bitmap_subset((unsigned long *) data.feat,
1026 kvm_s390_available_cpu_feat,
1027 KVM_S390_VM_CPU_FEAT_NR_BITS))
1030 mutex_lock(&kvm->lock);
1031 if (!atomic_read(&kvm->online_vcpus)) {
1032 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1033 KVM_S390_VM_CPU_FEAT_NR_BITS);
1036 mutex_unlock(&kvm->lock);
1040 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1041 struct kvm_device_attr *attr)
1044 * Once supported by kernel + hw, we have to store the subfunctions
1045 * in kvm->arch and remember that user space configured them.
1050 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1054 switch (attr->attr) {
1055 case KVM_S390_VM_CPU_PROCESSOR:
1056 ret = kvm_s390_set_processor(kvm, attr);
1058 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1059 ret = kvm_s390_set_processor_feat(kvm, attr);
1061 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1062 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1068 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1070 struct kvm_s390_vm_cpu_processor *proc;
1073 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1078 proc->cpuid = kvm->arch.model.cpuid;
1079 proc->ibc = kvm->arch.model.ibc;
1080 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1081 S390_ARCH_FAC_LIST_SIZE_BYTE);
1082 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1083 kvm->arch.model.ibc,
1084 kvm->arch.model.cpuid);
1085 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1086 kvm->arch.model.fac_list[0],
1087 kvm->arch.model.fac_list[1],
1088 kvm->arch.model.fac_list[2]);
1089 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1096 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1098 struct kvm_s390_vm_cpu_machine *mach;
1101 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1106 get_cpu_id((struct cpuid *) &mach->cpuid);
1107 mach->ibc = sclp.ibc;
1108 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1109 S390_ARCH_FAC_LIST_SIZE_BYTE);
1110 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1111 sizeof(S390_lowcore.stfle_fac_list));
1112 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1113 kvm->arch.model.ibc,
1114 kvm->arch.model.cpuid);
1115 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1119 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1123 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1130 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1131 struct kvm_device_attr *attr)
1133 struct kvm_s390_vm_cpu_feat data;
1135 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1136 KVM_S390_VM_CPU_FEAT_NR_BITS);
1137 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1142 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1143 struct kvm_device_attr *attr)
1145 struct kvm_s390_vm_cpu_feat data;
1147 bitmap_copy((unsigned long *) data.feat,
1148 kvm_s390_available_cpu_feat,
1149 KVM_S390_VM_CPU_FEAT_NR_BITS);
1150 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1155 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1156 struct kvm_device_attr *attr)
1159 * Once we can actually configure subfunctions (kernel + hw support),
1160 * we have to check if they were already set by user space, if so copy
1161 * them from kvm->arch.
1166 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1167 struct kvm_device_attr *attr)
1169 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1170 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1174 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1178 switch (attr->attr) {
1179 case KVM_S390_VM_CPU_PROCESSOR:
1180 ret = kvm_s390_get_processor(kvm, attr);
1182 case KVM_S390_VM_CPU_MACHINE:
1183 ret = kvm_s390_get_machine(kvm, attr);
1185 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1186 ret = kvm_s390_get_processor_feat(kvm, attr);
1188 case KVM_S390_VM_CPU_MACHINE_FEAT:
1189 ret = kvm_s390_get_machine_feat(kvm, attr);
1191 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1192 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1194 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1195 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1201 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1205 switch (attr->group) {
1206 case KVM_S390_VM_MEM_CTRL:
1207 ret = kvm_s390_set_mem_control(kvm, attr);
1209 case KVM_S390_VM_TOD:
1210 ret = kvm_s390_set_tod(kvm, attr);
1212 case KVM_S390_VM_CPU_MODEL:
1213 ret = kvm_s390_set_cpu_model(kvm, attr);
1215 case KVM_S390_VM_CRYPTO:
1216 ret = kvm_s390_vm_set_crypto(kvm, attr);
1218 case KVM_S390_VM_MIGRATION:
1219 ret = kvm_s390_vm_set_migration(kvm, attr);
1229 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1233 switch (attr->group) {
1234 case KVM_S390_VM_MEM_CTRL:
1235 ret = kvm_s390_get_mem_control(kvm, attr);
1237 case KVM_S390_VM_TOD:
1238 ret = kvm_s390_get_tod(kvm, attr);
1240 case KVM_S390_VM_CPU_MODEL:
1241 ret = kvm_s390_get_cpu_model(kvm, attr);
1243 case KVM_S390_VM_MIGRATION:
1244 ret = kvm_s390_vm_get_migration(kvm, attr);
1254 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1258 switch (attr->group) {
1259 case KVM_S390_VM_MEM_CTRL:
1260 switch (attr->attr) {
1261 case KVM_S390_VM_MEM_ENABLE_CMMA:
1262 case KVM_S390_VM_MEM_CLR_CMMA:
1263 ret = sclp.has_cmma ? 0 : -ENXIO;
1265 case KVM_S390_VM_MEM_LIMIT_SIZE:
1273 case KVM_S390_VM_TOD:
1274 switch (attr->attr) {
1275 case KVM_S390_VM_TOD_LOW:
1276 case KVM_S390_VM_TOD_HIGH:
1284 case KVM_S390_VM_CPU_MODEL:
1285 switch (attr->attr) {
1286 case KVM_S390_VM_CPU_PROCESSOR:
1287 case KVM_S390_VM_CPU_MACHINE:
1288 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1289 case KVM_S390_VM_CPU_MACHINE_FEAT:
1290 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1293 /* configuring subfunctions is not supported yet */
1294 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1300 case KVM_S390_VM_CRYPTO:
1301 switch (attr->attr) {
1302 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1303 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1304 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1305 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1313 case KVM_S390_VM_MIGRATION:
1324 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1330 if (args->flags != 0)
1333 /* Is this guest using storage keys? */
1334 if (!mm_use_skey(current->mm))
1335 return KVM_S390_GET_SKEYS_NONE;
1337 /* Enforce sane limit on memory allocation */
1338 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1341 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1345 down_read(¤t->mm->mmap_sem);
1346 for (i = 0; i < args->count; i++) {
1347 hva = gfn_to_hva(kvm, args->start_gfn + i);
1348 if (kvm_is_error_hva(hva)) {
1353 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1357 up_read(¤t->mm->mmap_sem);
1360 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1361 sizeof(uint8_t) * args->count);
1370 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1376 if (args->flags != 0)
1379 /* Enforce sane limit on memory allocation */
1380 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1383 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1387 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1388 sizeof(uint8_t) * args->count);
1394 /* Enable storage key handling for the guest */
1395 r = s390_enable_skey();
1399 down_read(¤t->mm->mmap_sem);
1400 for (i = 0; i < args->count; i++) {
1401 hva = gfn_to_hva(kvm, args->start_gfn + i);
1402 if (kvm_is_error_hva(hva)) {
1407 /* Lowest order bit is reserved */
1408 if (keys[i] & 0x01) {
1413 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1417 up_read(¤t->mm->mmap_sem);
1424 * Base address and length must be sent at the start of each block, therefore
1425 * it's cheaper to send some clean data, as long as it's less than the size of
1428 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1429 /* for consistency */
1430 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1433 * This function searches for the next page with dirty CMMA attributes, and
1434 * saves the attributes in the buffer up to either the end of the buffer or
1435 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1436 * no trailing clean bytes are saved.
1437 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1438 * output buffer will indicate 0 as length.
1440 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1441 struct kvm_s390_cmma_log *args)
1443 struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1444 unsigned long bufsize, hva, pgstev, i, next, cur;
1445 int srcu_idx, peek, r = 0, rr;
1448 cur = args->start_gfn;
1449 i = next = pgstev = 0;
1451 if (unlikely(!kvm->arch.use_cmma))
1453 /* Invalid/unsupported flags were specified */
1454 if (args->flags & ~KVM_S390_CMMA_PEEK)
1456 /* Migration mode query, and we are not doing a migration */
1457 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1460 /* CMMA is disabled or was not used, or the buffer has length zero */
1461 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1462 if (!bufsize || !kvm->mm->context.use_cmma) {
1463 memset(args, 0, sizeof(*args));
1468 /* We are not peeking, and there are no dirty pages */
1469 if (!atomic64_read(&s->dirty_pages)) {
1470 memset(args, 0, sizeof(*args));
1473 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1475 if (cur >= s->bitmap_size) /* nothing found, loop back */
1476 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1477 if (cur >= s->bitmap_size) { /* again! (very unlikely) */
1478 memset(args, 0, sizeof(*args));
1481 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1484 res = vmalloc(bufsize);
1488 args->start_gfn = cur;
1490 down_read(&kvm->mm->mmap_sem);
1491 srcu_idx = srcu_read_lock(&kvm->srcu);
1492 while (i < bufsize) {
1493 hva = gfn_to_hva(kvm, cur);
1494 if (kvm_is_error_hva(hva)) {
1498 /* decrement only if we actually flipped the bit to 0 */
1499 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1500 atomic64_dec(&s->dirty_pages);
1501 r = get_pgste(kvm->mm, hva, &pgstev);
1504 /* save the value */
1505 res[i++] = (pgstev >> 24) & 0x3;
1507 * if the next bit is too far away, stop.
1508 * if we reached the previous "next", find the next one
1511 if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1514 next = find_next_bit(s->pgste_bitmap,
1515 s->bitmap_size, cur + 1);
1516 /* reached the end of the bitmap or of the buffer, stop */
1517 if ((next >= s->bitmap_size) ||
1518 (next >= args->start_gfn + bufsize))
1523 srcu_read_unlock(&kvm->srcu, srcu_idx);
1524 up_read(&kvm->mm->mmap_sem);
1526 args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1528 rr = copy_to_user((void __user *)args->values, res, args->count);
1537 * This function sets the CMMA attributes for the given pages. If the input
1538 * buffer has zero length, no action is taken, otherwise the attributes are
1539 * set and the mm->context.use_cmma flag is set.
1541 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1542 const struct kvm_s390_cmma_log *args)
1544 unsigned long hva, mask, pgstev, i;
1546 int srcu_idx, r = 0;
1550 if (!kvm->arch.use_cmma)
1552 /* invalid/unsupported flags */
1553 if (args->flags != 0)
1555 /* Enforce sane limit on memory allocation */
1556 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1559 if (args->count == 0)
1562 bits = vmalloc(sizeof(*bits) * args->count);
1566 r = copy_from_user(bits, (void __user *)args->values, args->count);
1572 down_read(&kvm->mm->mmap_sem);
1573 srcu_idx = srcu_read_lock(&kvm->srcu);
1574 for (i = 0; i < args->count; i++) {
1575 hva = gfn_to_hva(kvm, args->start_gfn + i);
1576 if (kvm_is_error_hva(hva)) {
1582 pgstev = pgstev << 24;
1583 mask &= _PGSTE_GPS_USAGE_MASK;
1584 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1586 srcu_read_unlock(&kvm->srcu, srcu_idx);
1587 up_read(&kvm->mm->mmap_sem);
1589 if (!kvm->mm->context.use_cmma) {
1590 down_write(&kvm->mm->mmap_sem);
1591 kvm->mm->context.use_cmma = 1;
1592 up_write(&kvm->mm->mmap_sem);
1599 long kvm_arch_vm_ioctl(struct file *filp,
1600 unsigned int ioctl, unsigned long arg)
1602 struct kvm *kvm = filp->private_data;
1603 void __user *argp = (void __user *)arg;
1604 struct kvm_device_attr attr;
1608 case KVM_S390_INTERRUPT: {
1609 struct kvm_s390_interrupt s390int;
1612 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1614 r = kvm_s390_inject_vm(kvm, &s390int);
1617 case KVM_ENABLE_CAP: {
1618 struct kvm_enable_cap cap;
1620 if (copy_from_user(&cap, argp, sizeof(cap)))
1622 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1625 case KVM_CREATE_IRQCHIP: {
1626 struct kvm_irq_routing_entry routing;
1629 if (kvm->arch.use_irqchip) {
1630 /* Set up dummy routing. */
1631 memset(&routing, 0, sizeof(routing));
1632 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1636 case KVM_SET_DEVICE_ATTR: {
1638 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1640 r = kvm_s390_vm_set_attr(kvm, &attr);
1643 case KVM_GET_DEVICE_ATTR: {
1645 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1647 r = kvm_s390_vm_get_attr(kvm, &attr);
1650 case KVM_HAS_DEVICE_ATTR: {
1652 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1654 r = kvm_s390_vm_has_attr(kvm, &attr);
1657 case KVM_S390_GET_SKEYS: {
1658 struct kvm_s390_skeys args;
1661 if (copy_from_user(&args, argp,
1662 sizeof(struct kvm_s390_skeys)))
1664 r = kvm_s390_get_skeys(kvm, &args);
1667 case KVM_S390_SET_SKEYS: {
1668 struct kvm_s390_skeys args;
1671 if (copy_from_user(&args, argp,
1672 sizeof(struct kvm_s390_skeys)))
1674 r = kvm_s390_set_skeys(kvm, &args);
1677 case KVM_S390_GET_CMMA_BITS: {
1678 struct kvm_s390_cmma_log args;
1681 if (copy_from_user(&args, argp, sizeof(args)))
1683 r = kvm_s390_get_cmma_bits(kvm, &args);
1685 r = copy_to_user(argp, &args, sizeof(args));
1691 case KVM_S390_SET_CMMA_BITS: {
1692 struct kvm_s390_cmma_log args;
1695 if (copy_from_user(&args, argp, sizeof(args)))
1697 r = kvm_s390_set_cmma_bits(kvm, &args);
1707 static int kvm_s390_query_ap_config(u8 *config)
1709 u32 fcn_code = 0x04000000UL;
1712 memset(config, 0, 128);
1716 ".long 0xb2af0000\n" /* PQAP(QCI) */
1722 : "r" (fcn_code), "r" (config)
1723 : "cc", "0", "2", "memory"
1729 static int kvm_s390_apxa_installed(void)
1734 if (test_facility(12)) {
1735 cc = kvm_s390_query_ap_config(config);
1738 pr_err("PQAP(QCI) failed with cc=%d", cc);
1740 return config[0] & 0x40;
1746 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1748 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1750 if (kvm_s390_apxa_installed())
1751 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1753 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1756 static u64 kvm_s390_get_initial_cpuid(void)
1761 cpuid.version = 0xff;
1762 return *((u64 *) &cpuid);
1765 static void kvm_s390_crypto_init(struct kvm *kvm)
1767 if (!test_kvm_facility(kvm, 76))
1770 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1771 kvm_s390_set_crycb_format(kvm);
1773 /* Enable AES/DEA protected key functions by default */
1774 kvm->arch.crypto.aes_kw = 1;
1775 kvm->arch.crypto.dea_kw = 1;
1776 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1777 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1778 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1779 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1782 static void sca_dispose(struct kvm *kvm)
1784 if (kvm->arch.use_esca)
1785 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1787 free_page((unsigned long)(kvm->arch.sca));
1788 kvm->arch.sca = NULL;
1791 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1793 gfp_t alloc_flags = GFP_KERNEL;
1795 char debug_name[16];
1796 static unsigned long sca_offset;
1799 #ifdef CONFIG_KVM_S390_UCONTROL
1800 if (type & ~KVM_VM_S390_UCONTROL)
1802 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1809 rc = s390_enable_sie();
1815 ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1817 kvm->arch.use_esca = 0; /* start with basic SCA */
1818 if (!sclp.has_64bscao)
1819 alloc_flags |= GFP_DMA;
1820 rwlock_init(&kvm->arch.sca_lock);
1821 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1824 spin_lock(&kvm_lock);
1826 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1828 kvm->arch.sca = (struct bsca_block *)
1829 ((char *) kvm->arch.sca + sca_offset);
1830 spin_unlock(&kvm_lock);
1832 sprintf(debug_name, "kvm-%u", current->pid);
1834 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1838 kvm->arch.sie_page2 =
1839 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1840 if (!kvm->arch.sie_page2)
1843 /* Populate the facility mask initially. */
1844 memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1845 sizeof(S390_lowcore.stfle_fac_list));
1846 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1847 if (i < kvm_s390_fac_list_mask_size())
1848 kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1850 kvm->arch.model.fac_mask[i] = 0UL;
1853 /* Populate the facility list initially. */
1854 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1855 memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1856 S390_ARCH_FAC_LIST_SIZE_BYTE);
1858 set_kvm_facility(kvm->arch.model.fac_mask, 74);
1859 set_kvm_facility(kvm->arch.model.fac_list, 74);
1861 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1862 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1864 kvm_s390_crypto_init(kvm);
1866 mutex_init(&kvm->arch.float_int.ais_lock);
1867 kvm->arch.float_int.simm = 0;
1868 kvm->arch.float_int.nimm = 0;
1869 kvm->arch.float_int.ais_enabled = 0;
1870 spin_lock_init(&kvm->arch.float_int.lock);
1871 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1872 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1873 init_waitqueue_head(&kvm->arch.ipte_wq);
1874 mutex_init(&kvm->arch.ipte_mutex);
1876 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1877 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1879 if (type & KVM_VM_S390_UCONTROL) {
1880 kvm->arch.gmap = NULL;
1881 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1883 if (sclp.hamax == U64_MAX)
1884 kvm->arch.mem_limit = TASK_SIZE_MAX;
1886 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1888 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1889 if (!kvm->arch.gmap)
1891 kvm->arch.gmap->private = kvm;
1892 kvm->arch.gmap->pfault_enabled = 0;
1895 kvm->arch.css_support = 0;
1896 kvm->arch.use_irqchip = 0;
1897 kvm->arch.epoch = 0;
1899 spin_lock_init(&kvm->arch.start_stop_lock);
1900 kvm_s390_vsie_init(kvm);
1901 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1905 free_page((unsigned long)kvm->arch.sie_page2);
1906 debug_unregister(kvm->arch.dbf);
1908 KVM_EVENT(3, "creation of vm failed: %d", rc);
1912 bool kvm_arch_has_vcpu_debugfs(void)
1917 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1922 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1924 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1925 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1926 kvm_s390_clear_local_irqs(vcpu);
1927 kvm_clear_async_pf_completion_queue(vcpu);
1928 if (!kvm_is_ucontrol(vcpu->kvm))
1931 if (kvm_is_ucontrol(vcpu->kvm))
1932 gmap_remove(vcpu->arch.gmap);
1934 if (vcpu->kvm->arch.use_cmma)
1935 kvm_s390_vcpu_unsetup_cmma(vcpu);
1936 free_page((unsigned long)(vcpu->arch.sie_block));
1938 kvm_vcpu_uninit(vcpu);
1939 kmem_cache_free(kvm_vcpu_cache, vcpu);
1942 static void kvm_free_vcpus(struct kvm *kvm)
1945 struct kvm_vcpu *vcpu;
1947 kvm_for_each_vcpu(i, vcpu, kvm)
1948 kvm_arch_vcpu_destroy(vcpu);
1950 mutex_lock(&kvm->lock);
1951 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1952 kvm->vcpus[i] = NULL;
1954 atomic_set(&kvm->online_vcpus, 0);
1955 mutex_unlock(&kvm->lock);
1958 void kvm_arch_destroy_vm(struct kvm *kvm)
1960 kvm_free_vcpus(kvm);
1962 debug_unregister(kvm->arch.dbf);
1963 free_page((unsigned long)kvm->arch.sie_page2);
1964 if (!kvm_is_ucontrol(kvm))
1965 gmap_remove(kvm->arch.gmap);
1966 kvm_s390_destroy_adapters(kvm);
1967 kvm_s390_clear_float_irqs(kvm);
1968 kvm_s390_vsie_destroy(kvm);
1969 if (kvm->arch.migration_state) {
1970 vfree(kvm->arch.migration_state->pgste_bitmap);
1971 kfree(kvm->arch.migration_state);
1973 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1976 /* Section: vcpu related */
1977 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1979 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1980 if (!vcpu->arch.gmap)
1982 vcpu->arch.gmap->private = vcpu->kvm;
1987 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1989 if (!kvm_s390_use_sca_entries())
1991 read_lock(&vcpu->kvm->arch.sca_lock);
1992 if (vcpu->kvm->arch.use_esca) {
1993 struct esca_block *sca = vcpu->kvm->arch.sca;
1995 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1996 sca->cpu[vcpu->vcpu_id].sda = 0;
1998 struct bsca_block *sca = vcpu->kvm->arch.sca;
2000 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2001 sca->cpu[vcpu->vcpu_id].sda = 0;
2003 read_unlock(&vcpu->kvm->arch.sca_lock);
2006 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2008 if (!kvm_s390_use_sca_entries()) {
2009 struct bsca_block *sca = vcpu->kvm->arch.sca;
2011 /* we still need the basic sca for the ipte control */
2012 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2013 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2015 read_lock(&vcpu->kvm->arch.sca_lock);
2016 if (vcpu->kvm->arch.use_esca) {
2017 struct esca_block *sca = vcpu->kvm->arch.sca;
2019 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2020 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2021 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2022 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2023 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2025 struct bsca_block *sca = vcpu->kvm->arch.sca;
2027 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2028 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2029 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2030 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2032 read_unlock(&vcpu->kvm->arch.sca_lock);
2035 /* Basic SCA to Extended SCA data copy routines */
2036 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2039 d->sigp_ctrl.c = s->sigp_ctrl.c;
2040 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2043 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2047 d->ipte_control = s->ipte_control;
2049 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2050 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2053 static int sca_switch_to_extended(struct kvm *kvm)
2055 struct bsca_block *old_sca = kvm->arch.sca;
2056 struct esca_block *new_sca;
2057 struct kvm_vcpu *vcpu;
2058 unsigned int vcpu_idx;
2061 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2065 scaoh = (u32)((u64)(new_sca) >> 32);
2066 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2068 kvm_s390_vcpu_block_all(kvm);
2069 write_lock(&kvm->arch.sca_lock);
2071 sca_copy_b_to_e(new_sca, old_sca);
2073 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2074 vcpu->arch.sie_block->scaoh = scaoh;
2075 vcpu->arch.sie_block->scaol = scaol;
2076 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2078 kvm->arch.sca = new_sca;
2079 kvm->arch.use_esca = 1;
2081 write_unlock(&kvm->arch.sca_lock);
2082 kvm_s390_vcpu_unblock_all(kvm);
2084 free_page((unsigned long)old_sca);
2086 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2087 old_sca, kvm->arch.sca);
2091 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2095 if (!kvm_s390_use_sca_entries()) {
2096 if (id < KVM_MAX_VCPUS)
2100 if (id < KVM_S390_BSCA_CPU_SLOTS)
2102 if (!sclp.has_esca || !sclp.has_64bscao)
2105 mutex_lock(&kvm->lock);
2106 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2107 mutex_unlock(&kvm->lock);
2109 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2112 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2114 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2115 kvm_clear_async_pf_completion_queue(vcpu);
2116 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2122 kvm_s390_set_prefix(vcpu, 0);
2123 if (test_kvm_facility(vcpu->kvm, 64))
2124 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2125 if (test_kvm_facility(vcpu->kvm, 133))
2126 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2127 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2128 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2131 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2133 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2135 if (kvm_is_ucontrol(vcpu->kvm))
2136 return __kvm_ucontrol_vcpu_init(vcpu);
2141 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2142 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2144 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2145 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2146 vcpu->arch.cputm_start = get_tod_clock_fast();
2147 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2150 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2151 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2153 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2154 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2155 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2156 vcpu->arch.cputm_start = 0;
2157 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2160 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2161 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2163 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2164 vcpu->arch.cputm_enabled = true;
2165 __start_cpu_timer_accounting(vcpu);
2168 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2169 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2171 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2172 __stop_cpu_timer_accounting(vcpu);
2173 vcpu->arch.cputm_enabled = false;
2176 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2178 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2179 __enable_cpu_timer_accounting(vcpu);
2183 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2185 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2186 __disable_cpu_timer_accounting(vcpu);
2190 /* set the cpu timer - may only be called from the VCPU thread itself */
2191 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2193 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2194 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2195 if (vcpu->arch.cputm_enabled)
2196 vcpu->arch.cputm_start = get_tod_clock_fast();
2197 vcpu->arch.sie_block->cputm = cputm;
2198 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2202 /* update and get the cpu timer - can also be called from other VCPU threads */
2203 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2208 if (unlikely(!vcpu->arch.cputm_enabled))
2209 return vcpu->arch.sie_block->cputm;
2211 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2213 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2215 * If the writer would ever execute a read in the critical
2216 * section, e.g. in irq context, we have a deadlock.
2218 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2219 value = vcpu->arch.sie_block->cputm;
2220 /* if cputm_start is 0, accounting is being started/stopped */
2221 if (likely(vcpu->arch.cputm_start))
2222 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2223 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2228 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2231 gmap_enable(vcpu->arch.enabled_gmap);
2232 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2233 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2234 __start_cpu_timer_accounting(vcpu);
2238 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2241 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2242 __stop_cpu_timer_accounting(vcpu);
2243 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2244 vcpu->arch.enabled_gmap = gmap_get_enabled();
2245 gmap_disable(vcpu->arch.enabled_gmap);
2249 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2251 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2252 vcpu->arch.sie_block->gpsw.mask = 0UL;
2253 vcpu->arch.sie_block->gpsw.addr = 0UL;
2254 kvm_s390_set_prefix(vcpu, 0);
2255 kvm_s390_set_cpu_timer(vcpu, 0);
2256 vcpu->arch.sie_block->ckc = 0UL;
2257 vcpu->arch.sie_block->todpr = 0;
2258 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2259 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
2260 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2261 /* make sure the new fpc will be lazily loaded */
2263 current->thread.fpu.fpc = 0;
2264 vcpu->arch.sie_block->gbea = 1;
2265 vcpu->arch.sie_block->pp = 0;
2266 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2267 kvm_clear_async_pf_completion_queue(vcpu);
2268 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2269 kvm_s390_vcpu_stop(vcpu);
2270 kvm_s390_clear_local_irqs(vcpu);
2273 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2275 mutex_lock(&vcpu->kvm->lock);
2277 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2279 mutex_unlock(&vcpu->kvm->lock);
2280 if (!kvm_is_ucontrol(vcpu->kvm)) {
2281 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2284 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2285 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2286 /* make vcpu_load load the right gmap on the first trigger */
2287 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2290 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2292 if (!test_kvm_facility(vcpu->kvm, 76))
2295 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2297 if (vcpu->kvm->arch.crypto.aes_kw)
2298 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2299 if (vcpu->kvm->arch.crypto.dea_kw)
2300 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2302 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2305 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2307 free_page(vcpu->arch.sie_block->cbrlo);
2308 vcpu->arch.sie_block->cbrlo = 0;
2311 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2313 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2314 if (!vcpu->arch.sie_block->cbrlo)
2317 vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2321 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2323 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2325 vcpu->arch.sie_block->ibc = model->ibc;
2326 if (test_kvm_facility(vcpu->kvm, 7))
2327 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2330 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2334 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2338 if (test_kvm_facility(vcpu->kvm, 78))
2339 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2340 else if (test_kvm_facility(vcpu->kvm, 8))
2341 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2343 kvm_s390_vcpu_setup_model(vcpu);
2345 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2346 if (MACHINE_HAS_ESOP)
2347 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2348 if (test_kvm_facility(vcpu->kvm, 9))
2349 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2350 if (test_kvm_facility(vcpu->kvm, 73))
2351 vcpu->arch.sie_block->ecb |= ECB_TE;
2353 if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2354 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2355 if (test_kvm_facility(vcpu->kvm, 130))
2356 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2357 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2359 vcpu->arch.sie_block->eca |= ECA_CEI;
2361 vcpu->arch.sie_block->eca |= ECA_IB;
2363 vcpu->arch.sie_block->eca |= ECA_SII;
2364 if (sclp.has_sigpif)
2365 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2366 if (test_kvm_facility(vcpu->kvm, 129)) {
2367 vcpu->arch.sie_block->eca |= ECA_VX;
2368 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2370 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2372 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2375 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2377 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2379 if (vcpu->kvm->arch.use_cmma) {
2380 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2384 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2385 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2387 kvm_s390_vcpu_crypto_setup(vcpu);
2392 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2395 struct kvm_vcpu *vcpu;
2396 struct sie_page *sie_page;
2399 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2404 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2408 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2409 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2413 vcpu->arch.sie_block = &sie_page->sie_block;
2414 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2416 /* the real guest size will always be smaller than msl */
2417 vcpu->arch.sie_block->mso = 0;
2418 vcpu->arch.sie_block->msl = sclp.hamax;
2420 vcpu->arch.sie_block->icpua = id;
2421 spin_lock_init(&vcpu->arch.local_int.lock);
2422 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2423 vcpu->arch.local_int.wq = &vcpu->wq;
2424 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2425 seqcount_init(&vcpu->arch.cputm_seqcount);
2427 rc = kvm_vcpu_init(vcpu, kvm, id);
2429 goto out_free_sie_block;
2430 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2431 vcpu->arch.sie_block);
2432 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2436 free_page((unsigned long)(vcpu->arch.sie_block));
2438 kmem_cache_free(kvm_vcpu_cache, vcpu);
2443 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2445 return kvm_s390_vcpu_has_irq(vcpu, 0);
2448 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2450 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2454 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2456 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2459 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2461 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2465 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2467 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2471 * Kick a guest cpu out of SIE and wait until SIE is not running.
2472 * If the CPU is not running (e.g. waiting as idle) the function will
2473 * return immediately. */
2474 void exit_sie(struct kvm_vcpu *vcpu)
2476 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2477 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2481 /* Kick a guest cpu out of SIE to process a request synchronously */
2482 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2484 kvm_make_request(req, vcpu);
2485 kvm_s390_vcpu_request(vcpu);
2488 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2491 struct kvm *kvm = gmap->private;
2492 struct kvm_vcpu *vcpu;
2493 unsigned long prefix;
2496 if (gmap_is_shadow(gmap))
2498 if (start >= 1UL << 31)
2499 /* We are only interested in prefix pages */
2501 kvm_for_each_vcpu(i, vcpu, kvm) {
2502 /* match against both prefix pages */
2503 prefix = kvm_s390_get_prefix(vcpu);
2504 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2505 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2507 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2512 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2514 /* kvm common code refers to this, but never calls it */
2519 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2520 struct kvm_one_reg *reg)
2525 case KVM_REG_S390_TODPR:
2526 r = put_user(vcpu->arch.sie_block->todpr,
2527 (u32 __user *)reg->addr);
2529 case KVM_REG_S390_EPOCHDIFF:
2530 r = put_user(vcpu->arch.sie_block->epoch,
2531 (u64 __user *)reg->addr);
2533 case KVM_REG_S390_CPU_TIMER:
2534 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2535 (u64 __user *)reg->addr);
2537 case KVM_REG_S390_CLOCK_COMP:
2538 r = put_user(vcpu->arch.sie_block->ckc,
2539 (u64 __user *)reg->addr);
2541 case KVM_REG_S390_PFTOKEN:
2542 r = put_user(vcpu->arch.pfault_token,
2543 (u64 __user *)reg->addr);
2545 case KVM_REG_S390_PFCOMPARE:
2546 r = put_user(vcpu->arch.pfault_compare,
2547 (u64 __user *)reg->addr);
2549 case KVM_REG_S390_PFSELECT:
2550 r = put_user(vcpu->arch.pfault_select,
2551 (u64 __user *)reg->addr);
2553 case KVM_REG_S390_PP:
2554 r = put_user(vcpu->arch.sie_block->pp,
2555 (u64 __user *)reg->addr);
2557 case KVM_REG_S390_GBEA:
2558 r = put_user(vcpu->arch.sie_block->gbea,
2559 (u64 __user *)reg->addr);
2568 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2569 struct kvm_one_reg *reg)
2575 case KVM_REG_S390_TODPR:
2576 r = get_user(vcpu->arch.sie_block->todpr,
2577 (u32 __user *)reg->addr);
2579 case KVM_REG_S390_EPOCHDIFF:
2580 r = get_user(vcpu->arch.sie_block->epoch,
2581 (u64 __user *)reg->addr);
2583 case KVM_REG_S390_CPU_TIMER:
2584 r = get_user(val, (u64 __user *)reg->addr);
2586 kvm_s390_set_cpu_timer(vcpu, val);
2588 case KVM_REG_S390_CLOCK_COMP:
2589 r = get_user(vcpu->arch.sie_block->ckc,
2590 (u64 __user *)reg->addr);
2592 case KVM_REG_S390_PFTOKEN:
2593 r = get_user(vcpu->arch.pfault_token,
2594 (u64 __user *)reg->addr);
2595 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2596 kvm_clear_async_pf_completion_queue(vcpu);
2598 case KVM_REG_S390_PFCOMPARE:
2599 r = get_user(vcpu->arch.pfault_compare,
2600 (u64 __user *)reg->addr);
2602 case KVM_REG_S390_PFSELECT:
2603 r = get_user(vcpu->arch.pfault_select,
2604 (u64 __user *)reg->addr);
2606 case KVM_REG_S390_PP:
2607 r = get_user(vcpu->arch.sie_block->pp,
2608 (u64 __user *)reg->addr);
2610 case KVM_REG_S390_GBEA:
2611 r = get_user(vcpu->arch.sie_block->gbea,
2612 (u64 __user *)reg->addr);
2621 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2623 kvm_s390_vcpu_initial_reset(vcpu);
2627 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2629 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2633 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2635 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2639 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2640 struct kvm_sregs *sregs)
2642 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2643 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2647 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2648 struct kvm_sregs *sregs)
2650 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2651 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2655 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2657 if (test_fp_ctl(fpu->fpc))
2659 vcpu->run->s.regs.fpc = fpu->fpc;
2661 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2662 (freg_t *) fpu->fprs);
2664 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2668 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2670 /* make sure we have the latest values */
2673 convert_vx_to_fp((freg_t *) fpu->fprs,
2674 (__vector128 *) vcpu->run->s.regs.vrs);
2676 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2677 fpu->fpc = vcpu->run->s.regs.fpc;
2681 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2685 if (!is_vcpu_stopped(vcpu))
2688 vcpu->run->psw_mask = psw.mask;
2689 vcpu->run->psw_addr = psw.addr;
2694 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2695 struct kvm_translation *tr)
2697 return -EINVAL; /* not implemented yet */
2700 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2701 KVM_GUESTDBG_USE_HW_BP | \
2702 KVM_GUESTDBG_ENABLE)
2704 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2705 struct kvm_guest_debug *dbg)
2709 vcpu->guest_debug = 0;
2710 kvm_s390_clear_bp_data(vcpu);
2712 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2714 if (!sclp.has_gpere)
2717 if (dbg->control & KVM_GUESTDBG_ENABLE) {
2718 vcpu->guest_debug = dbg->control;
2719 /* enforce guest PER */
2720 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2722 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2723 rc = kvm_s390_import_bp_data(vcpu, dbg);
2725 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2726 vcpu->arch.guestdbg.last_bp = 0;
2730 vcpu->guest_debug = 0;
2731 kvm_s390_clear_bp_data(vcpu);
2732 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2738 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2739 struct kvm_mp_state *mp_state)
2741 /* CHECK_STOP and LOAD are not supported yet */
2742 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2743 KVM_MP_STATE_OPERATING;
2746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2747 struct kvm_mp_state *mp_state)
2751 /* user space knows about this interface - let it control the state */
2752 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2754 switch (mp_state->mp_state) {
2755 case KVM_MP_STATE_STOPPED:
2756 kvm_s390_vcpu_stop(vcpu);
2758 case KVM_MP_STATE_OPERATING:
2759 kvm_s390_vcpu_start(vcpu);
2761 case KVM_MP_STATE_LOAD:
2762 case KVM_MP_STATE_CHECK_STOP:
2763 /* fall through - CHECK_STOP and LOAD are not supported yet */
2771 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2773 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2776 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2779 kvm_s390_vcpu_request_handled(vcpu);
2780 if (!vcpu->requests)
2783 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2784 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2785 * This ensures that the ipte instruction for this request has
2786 * already finished. We might race against a second unmapper that
2787 * wants to set the blocking bit. Lets just retry the request loop.
2789 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2791 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2792 kvm_s390_get_prefix(vcpu),
2793 PAGE_SIZE * 2, PROT_WRITE);
2795 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2801 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2802 vcpu->arch.sie_block->ihcpu = 0xffff;
2806 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2807 if (!ibs_enabled(vcpu)) {
2808 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2809 atomic_or(CPUSTAT_IBS,
2810 &vcpu->arch.sie_block->cpuflags);
2815 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2816 if (ibs_enabled(vcpu)) {
2817 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2818 atomic_andnot(CPUSTAT_IBS,
2819 &vcpu->arch.sie_block->cpuflags);
2824 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2825 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2829 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2831 * Disable CMMA virtualization; we will emulate the ESSA
2832 * instruction manually, in order to provide additional
2833 * functionalities needed for live migration.
2835 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2839 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2841 * Re-enable CMMA virtualization if CMMA is available and
2844 if ((vcpu->kvm->arch.use_cmma) &&
2845 (vcpu->kvm->mm->context.use_cmma))
2846 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2850 /* nothing to do, just clear the request */
2851 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2856 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2858 struct kvm_vcpu *vcpu;
2861 mutex_lock(&kvm->lock);
2863 kvm->arch.epoch = tod - get_tod_clock();
2864 kvm_s390_vcpu_block_all(kvm);
2865 kvm_for_each_vcpu(i, vcpu, kvm)
2866 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2867 kvm_s390_vcpu_unblock_all(kvm);
2869 mutex_unlock(&kvm->lock);
2873 * kvm_arch_fault_in_page - fault-in guest page if necessary
2874 * @vcpu: The corresponding virtual cpu
2875 * @gpa: Guest physical address
2876 * @writable: Whether the page should be writable or not
2878 * Make sure that a guest page has been faulted-in on the host.
2880 * Return: Zero on success, negative error code otherwise.
2882 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2884 return gmap_fault(vcpu->arch.gmap, gpa,
2885 writable ? FAULT_FLAG_WRITE : 0);
2888 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2889 unsigned long token)
2891 struct kvm_s390_interrupt inti;
2892 struct kvm_s390_irq irq;
2895 irq.u.ext.ext_params2 = token;
2896 irq.type = KVM_S390_INT_PFAULT_INIT;
2897 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2899 inti.type = KVM_S390_INT_PFAULT_DONE;
2900 inti.parm64 = token;
2901 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2905 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2906 struct kvm_async_pf *work)
2908 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2909 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2912 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2913 struct kvm_async_pf *work)
2915 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2916 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2919 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2920 struct kvm_async_pf *work)
2922 /* s390 will always inject the page directly */
2925 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2928 * s390 will always inject the page directly,
2929 * but we still want check_async_completion to cleanup
2934 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2937 struct kvm_arch_async_pf arch;
2940 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2942 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2943 vcpu->arch.pfault_compare)
2945 if (psw_extint_disabled(vcpu))
2947 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2949 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2951 if (!vcpu->arch.gmap->pfault_enabled)
2954 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2955 hva += current->thread.gmap_addr & ~PAGE_MASK;
2956 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2959 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2963 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2968 * On s390 notifications for arriving pages will be delivered directly
2969 * to the guest but the house keeping for completed pfaults is
2970 * handled outside the worker.
2972 kvm_check_async_pf_completion(vcpu);
2974 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2975 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2980 if (test_cpu_flag(CIF_MCCK_PENDING))
2983 if (!kvm_is_ucontrol(vcpu->kvm)) {
2984 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2989 rc = kvm_s390_handle_requests(vcpu);
2993 if (guestdbg_enabled(vcpu)) {
2994 kvm_s390_backup_guest_per_regs(vcpu);
2995 kvm_s390_patch_guest_per_regs(vcpu);
2998 vcpu->arch.sie_block->icptcode = 0;
2999 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3000 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3001 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3006 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3008 struct kvm_s390_pgm_info pgm_info = {
3009 .code = PGM_ADDRESSING,
3014 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3015 trace_kvm_s390_sie_fault(vcpu);
3018 * We want to inject an addressing exception, which is defined as a
3019 * suppressing or terminating exception. However, since we came here
3020 * by a DAT access exception, the PSW still points to the faulting
3021 * instruction since DAT exceptions are nullifying. So we've got
3022 * to look up the current opcode to get the length of the instruction
3023 * to be able to forward the PSW.
3025 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3026 ilen = insn_length(opcode);
3030 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3031 * Forward by arbitrary ilc, injection will take care of
3032 * nullification if necessary.
3034 pgm_info = vcpu->arch.pgm;
3037 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3038 kvm_s390_forward_psw(vcpu, ilen);
3039 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3042 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3044 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3045 vcpu->arch.sie_block->icptcode);
3046 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3048 if (guestdbg_enabled(vcpu))
3049 kvm_s390_restore_guest_per_regs(vcpu);
3051 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3052 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3054 if (vcpu->arch.sie_block->icptcode > 0) {
3055 int rc = kvm_handle_sie_intercept(vcpu);
3057 if (rc != -EOPNOTSUPP)
3059 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3060 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3061 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3062 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3064 } else if (exit_reason != -EFAULT) {
3065 vcpu->stat.exit_null++;
3067 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3068 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3069 vcpu->run->s390_ucontrol.trans_exc_code =
3070 current->thread.gmap_addr;
3071 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3073 } else if (current->thread.gmap_pfault) {
3074 trace_kvm_s390_major_guest_pfault(vcpu);
3075 current->thread.gmap_pfault = 0;
3076 if (kvm_arch_setup_async_pf(vcpu))
3078 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3080 return vcpu_post_run_fault_in_sie(vcpu);
3083 static int __vcpu_run(struct kvm_vcpu *vcpu)
3085 int rc, exit_reason;
3088 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3089 * ning the guest), so that memslots (and other stuff) are protected
3091 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3094 rc = vcpu_pre_run(vcpu);
3098 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3100 * As PF_VCPU will be used in fault handler, between
3101 * guest_enter and guest_exit should be no uaccess.
3103 local_irq_disable();
3104 guest_enter_irqoff();
3105 __disable_cpu_timer_accounting(vcpu);
3107 exit_reason = sie64a(vcpu->arch.sie_block,
3108 vcpu->run->s.regs.gprs);
3109 local_irq_disable();
3110 __enable_cpu_timer_accounting(vcpu);
3111 guest_exit_irqoff();
3113 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3115 rc = vcpu_post_run(vcpu, exit_reason);
3116 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3118 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3122 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3124 struct runtime_instr_cb *riccb;
3127 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3128 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3129 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3130 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3131 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3132 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3133 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3134 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3135 /* some control register changes require a tlb flush */
3136 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3138 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3139 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3140 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3141 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3142 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3143 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3145 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3146 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3147 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3148 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3149 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3150 kvm_clear_async_pf_completion_queue(vcpu);
3153 * If userspace sets the riccb (e.g. after migration) to a valid state,
3154 * we should enable RI here instead of doing the lazy enablement.
3156 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3157 test_kvm_facility(vcpu->kvm, 64) &&
3159 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3160 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3161 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3164 * If userspace sets the gscb (e.g. after migration) to non-zero,
3165 * we should enable GS here instead of doing the lazy enablement.
3167 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3168 test_kvm_facility(vcpu->kvm, 133) &&
3170 !vcpu->arch.gs_enabled) {
3171 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3172 vcpu->arch.sie_block->ecb |= ECB_GS;
3173 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3174 vcpu->arch.gs_enabled = 1;
3176 save_access_regs(vcpu->arch.host_acrs);
3177 restore_access_regs(vcpu->run->s.regs.acrs);
3178 /* save host (userspace) fprs/vrs */
3180 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3181 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3183 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3185 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3186 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3187 if (test_fp_ctl(current->thread.fpu.fpc))
3188 /* User space provided an invalid FPC, let's clear it */
3189 current->thread.fpu.fpc = 0;
3190 if (MACHINE_HAS_GS) {
3192 __ctl_set_bit(2, 4);
3193 if (current->thread.gs_cb) {
3194 vcpu->arch.host_gscb = current->thread.gs_cb;
3195 save_gs_cb(vcpu->arch.host_gscb);
3197 if (vcpu->arch.gs_enabled) {
3198 current->thread.gs_cb = (struct gs_cb *)
3199 &vcpu->run->s.regs.gscb;
3200 restore_gs_cb(current->thread.gs_cb);
3205 kvm_run->kvm_dirty_regs = 0;
3208 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3210 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3211 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3212 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3213 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3214 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3215 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3216 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3217 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3218 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3219 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3220 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3221 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3222 save_access_regs(vcpu->run->s.regs.acrs);
3223 restore_access_regs(vcpu->arch.host_acrs);
3224 /* Save guest register state */
3226 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3227 /* Restore will be done lazily at return */
3228 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3229 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3230 if (MACHINE_HAS_GS) {
3231 __ctl_set_bit(2, 4);
3232 if (vcpu->arch.gs_enabled)
3233 save_gs_cb(current->thread.gs_cb);
3235 current->thread.gs_cb = vcpu->arch.host_gscb;
3236 restore_gs_cb(vcpu->arch.host_gscb);
3238 if (!vcpu->arch.host_gscb)
3239 __ctl_clear_bit(2, 4);
3240 vcpu->arch.host_gscb = NULL;
3245 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3250 if (kvm_run->immediate_exit)
3253 if (guestdbg_exit_pending(vcpu)) {
3254 kvm_s390_prepare_debug_exit(vcpu);
3258 if (vcpu->sigset_active)
3259 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3261 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3262 kvm_s390_vcpu_start(vcpu);
3263 } else if (is_vcpu_stopped(vcpu)) {
3264 pr_err_ratelimited("can't run stopped vcpu %d\n",
3269 sync_regs(vcpu, kvm_run);
3270 enable_cpu_timer_accounting(vcpu);
3273 rc = __vcpu_run(vcpu);
3275 if (signal_pending(current) && !rc) {
3276 kvm_run->exit_reason = KVM_EXIT_INTR;
3280 if (guestdbg_exit_pending(vcpu) && !rc) {
3281 kvm_s390_prepare_debug_exit(vcpu);
3285 if (rc == -EREMOTE) {
3286 /* userspace support is needed, kvm_run has been prepared */
3290 disable_cpu_timer_accounting(vcpu);
3291 store_regs(vcpu, kvm_run);
3293 if (vcpu->sigset_active)
3294 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3296 vcpu->stat.exit_userspace++;
3301 * store status at address
3302 * we use have two special cases:
3303 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3304 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3306 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3308 unsigned char archmode = 1;
3309 freg_t fprs[NUM_FPRS];
3314 px = kvm_s390_get_prefix(vcpu);
3315 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3316 if (write_guest_abs(vcpu, 163, &archmode, 1))
3319 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3320 if (write_guest_real(vcpu, 163, &archmode, 1))
3324 gpa -= __LC_FPREGS_SAVE_AREA;
3326 /* manually convert vector registers if necessary */
3327 if (MACHINE_HAS_VX) {
3328 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3329 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3332 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3333 vcpu->run->s.regs.fprs, 128);
3335 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3336 vcpu->run->s.regs.gprs, 128);
3337 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3338 &vcpu->arch.sie_block->gpsw, 16);
3339 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3341 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3342 &vcpu->run->s.regs.fpc, 4);
3343 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3344 &vcpu->arch.sie_block->todpr, 4);
3345 cputm = kvm_s390_get_cpu_timer(vcpu);
3346 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3348 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3349 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3351 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3352 &vcpu->run->s.regs.acrs, 64);
3353 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3354 &vcpu->arch.sie_block->gcr, 128);
3355 return rc ? -EFAULT : 0;
3358 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3361 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3362 * switch in the run ioctl. Let's update our copies before we save
3363 * it into the save area
3366 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3367 save_access_regs(vcpu->run->s.regs.acrs);
3369 return kvm_s390_store_status_unloaded(vcpu, addr);
3372 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3374 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3375 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3378 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3381 struct kvm_vcpu *vcpu;
3383 kvm_for_each_vcpu(i, vcpu, kvm) {
3384 __disable_ibs_on_vcpu(vcpu);
3388 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3392 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3393 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3396 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3398 int i, online_vcpus, started_vcpus = 0;
3400 if (!is_vcpu_stopped(vcpu))
3403 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3404 /* Only one cpu at a time may enter/leave the STOPPED state. */
3405 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3406 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3408 for (i = 0; i < online_vcpus; i++) {
3409 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3413 if (started_vcpus == 0) {
3414 /* we're the only active VCPU -> speed it up */
3415 __enable_ibs_on_vcpu(vcpu);
3416 } else if (started_vcpus == 1) {
3418 * As we are starting a second VCPU, we have to disable
3419 * the IBS facility on all VCPUs to remove potentially
3420 * oustanding ENABLE requests.
3422 __disable_ibs_on_all_vcpus(vcpu->kvm);
3425 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3427 * Another VCPU might have used IBS while we were offline.
3428 * Let's play safe and flush the VCPU at startup.
3430 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3431 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3435 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3437 int i, online_vcpus, started_vcpus = 0;
3438 struct kvm_vcpu *started_vcpu = NULL;
3440 if (is_vcpu_stopped(vcpu))
3443 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3444 /* Only one cpu at a time may enter/leave the STOPPED state. */
3445 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3446 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3448 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3449 kvm_s390_clear_stop_irq(vcpu);
3451 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3452 __disable_ibs_on_vcpu(vcpu);
3454 for (i = 0; i < online_vcpus; i++) {
3455 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3457 started_vcpu = vcpu->kvm->vcpus[i];
3461 if (started_vcpus == 1) {
3463 * As we only have one VCPU left, we want to enable the
3464 * IBS facility for that VCPU to speed it up.
3466 __enable_ibs_on_vcpu(started_vcpu);
3469 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3473 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3474 struct kvm_enable_cap *cap)
3482 case KVM_CAP_S390_CSS_SUPPORT:
3483 if (!vcpu->kvm->arch.css_support) {
3484 vcpu->kvm->arch.css_support = 1;
3485 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3486 trace_kvm_s390_enable_css(vcpu->kvm);
3497 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3498 struct kvm_s390_mem_op *mop)
3500 void __user *uaddr = (void __user *)mop->buf;
3501 void *tmpbuf = NULL;
3503 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3504 | KVM_S390_MEMOP_F_CHECK_ONLY;
3506 if (mop->flags & ~supported_flags)
3509 if (mop->size > MEM_OP_MAX_SIZE)
3512 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3513 tmpbuf = vmalloc(mop->size);
3518 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3521 case KVM_S390_MEMOP_LOGICAL_READ:
3522 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3523 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3524 mop->size, GACC_FETCH);
3527 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3529 if (copy_to_user(uaddr, tmpbuf, mop->size))
3533 case KVM_S390_MEMOP_LOGICAL_WRITE:
3534 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3535 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3536 mop->size, GACC_STORE);
3539 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3543 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3549 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3551 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3552 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3558 long kvm_arch_vcpu_ioctl(struct file *filp,
3559 unsigned int ioctl, unsigned long arg)
3561 struct kvm_vcpu *vcpu = filp->private_data;
3562 void __user *argp = (void __user *)arg;
3567 case KVM_S390_IRQ: {
3568 struct kvm_s390_irq s390irq;
3571 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3573 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3576 case KVM_S390_INTERRUPT: {
3577 struct kvm_s390_interrupt s390int;
3578 struct kvm_s390_irq s390irq;
3581 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3583 if (s390int_to_s390irq(&s390int, &s390irq))
3585 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3588 case KVM_S390_STORE_STATUS:
3589 idx = srcu_read_lock(&vcpu->kvm->srcu);
3590 r = kvm_s390_vcpu_store_status(vcpu, arg);
3591 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3593 case KVM_S390_SET_INITIAL_PSW: {
3597 if (copy_from_user(&psw, argp, sizeof(psw)))
3599 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3602 case KVM_S390_INITIAL_RESET:
3603 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3605 case KVM_SET_ONE_REG:
3606 case KVM_GET_ONE_REG: {
3607 struct kvm_one_reg reg;
3609 if (copy_from_user(®, argp, sizeof(reg)))
3611 if (ioctl == KVM_SET_ONE_REG)
3612 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
3614 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
3617 #ifdef CONFIG_KVM_S390_UCONTROL
3618 case KVM_S390_UCAS_MAP: {
3619 struct kvm_s390_ucas_mapping ucasmap;
3621 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3626 if (!kvm_is_ucontrol(vcpu->kvm)) {
3631 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3632 ucasmap.vcpu_addr, ucasmap.length);
3635 case KVM_S390_UCAS_UNMAP: {
3636 struct kvm_s390_ucas_mapping ucasmap;
3638 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3643 if (!kvm_is_ucontrol(vcpu->kvm)) {
3648 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3653 case KVM_S390_VCPU_FAULT: {
3654 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3657 case KVM_ENABLE_CAP:
3659 struct kvm_enable_cap cap;
3661 if (copy_from_user(&cap, argp, sizeof(cap)))
3663 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3666 case KVM_S390_MEM_OP: {
3667 struct kvm_s390_mem_op mem_op;
3669 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3670 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3675 case KVM_S390_SET_IRQ_STATE: {
3676 struct kvm_s390_irq_state irq_state;
3679 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3681 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3682 irq_state.len == 0 ||
3683 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3687 r = kvm_s390_set_irq_state(vcpu,
3688 (void __user *) irq_state.buf,
3692 case KVM_S390_GET_IRQ_STATE: {
3693 struct kvm_s390_irq_state irq_state;
3696 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3698 if (irq_state.len == 0) {
3702 r = kvm_s390_get_irq_state(vcpu,
3703 (__u8 __user *) irq_state.buf,
3713 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3715 #ifdef CONFIG_KVM_S390_UCONTROL
3716 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3717 && (kvm_is_ucontrol(vcpu->kvm))) {
3718 vmf->page = virt_to_page(vcpu->arch.sie_block);
3719 get_page(vmf->page);
3723 return VM_FAULT_SIGBUS;
3726 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3727 unsigned long npages)
3732 /* Section: memory related */
3733 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3734 struct kvm_memory_slot *memslot,
3735 const struct kvm_userspace_memory_region *mem,
3736 enum kvm_mr_change change)
3738 /* A few sanity checks. We can have memory slots which have to be
3739 located/ended at a segment boundary (1MB). The memory in userland is
3740 ok to be fragmented into various different vmas. It is okay to mmap()
3741 and munmap() stuff in this slot after doing this call at any time */
3743 if (mem->userspace_addr & 0xffffful)
3746 if (mem->memory_size & 0xffffful)
3749 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3755 void kvm_arch_commit_memory_region(struct kvm *kvm,
3756 const struct kvm_userspace_memory_region *mem,
3757 const struct kvm_memory_slot *old,
3758 const struct kvm_memory_slot *new,
3759 enum kvm_mr_change change)
3763 /* If the basics of the memslot do not change, we do not want
3764 * to update the gmap. Every update causes several unnecessary
3765 * segment translation exceptions. This is usually handled just
3766 * fine by the normal fault handler + gmap, but it will also
3767 * cause faults on the prefix page of running guest CPUs.
3769 if (old->userspace_addr == mem->userspace_addr &&
3770 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3771 old->npages * PAGE_SIZE == mem->memory_size)
3774 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3775 mem->guest_phys_addr, mem->memory_size);
3777 pr_warn("failed to commit memory region\n");
3781 static inline unsigned long nonhyp_mask(int i)
3783 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3785 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3788 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3790 vcpu->valid_wakeup = false;
3793 static int __init kvm_s390_init(void)
3797 if (!sclp.has_sief2) {
3798 pr_info("SIE not available\n");
3802 for (i = 0; i < 16; i++)
3803 kvm_s390_fac_list_mask[i] |=
3804 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3806 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3809 static void __exit kvm_s390_exit(void)
3814 module_init(kvm_s390_init);
3815 module_exit(kvm_s390_exit);
3818 * Enable autoloading of the kvm module.
3819 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3820 * since x86 takes a different approach.
3822 #include <linux/miscdevice.h>
3823 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3824 MODULE_ALIAS("devname:kvm");