1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
50 #define CREATE_TRACE_POINTS
52 #include "trace-s390.h"
54 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57 (KVM_MAX_VCPUS + LOCAL_IRQS))
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63 { "userspace_handled", VCPU_STAT(exit_userspace) },
64 { "exit_null", VCPU_STAT(exit_null) },
65 { "exit_validity", VCPU_STAT(exit_validity) },
66 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67 { "exit_external_request", VCPU_STAT(exit_external_request) },
68 { "exit_io_request", VCPU_STAT(exit_io_request) },
69 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70 { "exit_instruction", VCPU_STAT(exit_instruction) },
71 { "exit_pei", VCPU_STAT(exit_pei) },
72 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
79 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
80 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
81 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
82 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
83 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
84 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
85 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
86 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
87 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
88 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
89 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
90 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
91 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
92 { "deliver_program", VCPU_STAT(deliver_program) },
93 { "deliver_io", VCPU_STAT(deliver_io) },
94 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
95 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
96 { "inject_ckc", VCPU_STAT(inject_ckc) },
97 { "inject_cputm", VCPU_STAT(inject_cputm) },
98 { "inject_external_call", VCPU_STAT(inject_external_call) },
99 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
100 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
101 { "inject_io", VM_STAT(inject_io) },
102 { "inject_mchk", VCPU_STAT(inject_mchk) },
103 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
104 { "inject_program", VCPU_STAT(inject_program) },
105 { "inject_restart", VCPU_STAT(inject_restart) },
106 { "inject_service_signal", VM_STAT(inject_service_signal) },
107 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
108 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
109 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
110 { "inject_virtio", VM_STAT(inject_virtio) },
111 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
112 { "instruction_gs", VCPU_STAT(instruction_gs) },
113 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
114 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
115 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
116 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
117 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
118 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
119 { "instruction_sck", VCPU_STAT(instruction_sck) },
120 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
121 { "instruction_spx", VCPU_STAT(instruction_spx) },
122 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
123 { "instruction_stap", VCPU_STAT(instruction_stap) },
124 { "instruction_iske", VCPU_STAT(instruction_iske) },
125 { "instruction_ri", VCPU_STAT(instruction_ri) },
126 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
127 { "instruction_sske", VCPU_STAT(instruction_sske) },
128 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
129 { "instruction_essa", VCPU_STAT(instruction_essa) },
130 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
131 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
132 { "instruction_tb", VCPU_STAT(instruction_tb) },
133 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
134 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
135 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
136 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
137 { "instruction_sie", VCPU_STAT(instruction_sie) },
138 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
139 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
140 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
141 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
142 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
143 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
144 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
145 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
146 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
147 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
148 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
149 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
150 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
151 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
152 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
153 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
154 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
155 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
156 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
157 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
158 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
159 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
160 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
164 struct kvm_s390_tod_clock_ext {
170 /* allow nested virtualization in KVM (if enabled by user space) */
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 /* allow 1m huge page guest backing, if !nested */
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181 * For now we handle at most 16 double words as this is what the s390 base
182 * kernel handles and stores in the prefix page. If we ever need to go beyond
183 * this, this requires changes to code, but the external uapi can stay.
185 #define SIZE_INTERNAL 16
188 * Base feature mask that defines default mask for facilities. Consists of the
189 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
193 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
194 * and defines the facilities that can be enabled via a cpu model.
196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
198 static unsigned long kvm_s390_fac_size(void)
200 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
202 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
203 sizeof(S390_lowcore.stfle_fac_list));
205 return SIZE_INTERNAL;
208 /* available cpu features supported by kvm */
209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
210 /* available subfunctions indicated via query / "test bit" */
211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
213 static struct gmap_notifier gmap_notifier;
214 static struct gmap_notifier vsie_gmap_notifier;
215 debug_info_t *kvm_s390_dbf;
217 /* Section: not file related */
218 int kvm_arch_hardware_enable(void)
220 /* every s390 is virtualization enabled ;-) */
224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
232 * The TOD jumps by delta, we have to compensate this by adding
233 * -delta to the epoch.
237 /* sign-extension - we're adding to signed values below */
242 if (scb->ecd & ECD_MEF) {
243 scb->epdx += delta_idx;
244 if (scb->epoch < delta)
250 * This callback is executed during stop_machine(). All CPUs are therefore
251 * temporarily stopped. In order not to change guest behavior, we have to
252 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
253 * so a CPU won't be stopped while calculating with the epoch.
255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
259 struct kvm_vcpu *vcpu;
261 unsigned long long *delta = v;
263 list_for_each_entry(kvm, &vm_list, vm_list) {
264 kvm_for_each_vcpu(i, vcpu, kvm) {
265 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
267 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
268 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
270 if (vcpu->arch.cputm_enabled)
271 vcpu->arch.cputm_start += *delta;
272 if (vcpu->arch.vsie_block)
273 kvm_clock_sync_scb(vcpu->arch.vsie_block,
280 static struct notifier_block kvm_clock_notifier = {
281 .notifier_call = kvm_clock_sync,
284 int kvm_arch_hardware_setup(void)
286 gmap_notifier.notifier_call = kvm_gmap_notifier;
287 gmap_register_pte_notifier(&gmap_notifier);
288 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
289 gmap_register_pte_notifier(&vsie_gmap_notifier);
290 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
291 &kvm_clock_notifier);
295 void kvm_arch_hardware_unsetup(void)
297 gmap_unregister_pte_notifier(&gmap_notifier);
298 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
299 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
300 &kvm_clock_notifier);
303 static void allow_cpu_feat(unsigned long nr)
305 set_bit_inv(nr, kvm_s390_available_cpu_feat);
308 static inline int plo_test_bit(unsigned char nr)
310 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
314 /* Parameter registers are ignored for "test bit" */
324 static void kvm_s390_cpu_feat_init(void)
328 for (i = 0; i < 256; ++i) {
330 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
333 if (test_facility(28)) /* TOD-clock steering */
334 ptff(kvm_s390_available_subfunc.ptff,
335 sizeof(kvm_s390_available_subfunc.ptff),
338 if (test_facility(17)) { /* MSA */
339 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
340 kvm_s390_available_subfunc.kmac);
341 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
342 kvm_s390_available_subfunc.kmc);
343 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
344 kvm_s390_available_subfunc.km);
345 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
346 kvm_s390_available_subfunc.kimd);
347 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
348 kvm_s390_available_subfunc.klmd);
350 if (test_facility(76)) /* MSA3 */
351 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
352 kvm_s390_available_subfunc.pckmo);
353 if (test_facility(77)) { /* MSA4 */
354 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
355 kvm_s390_available_subfunc.kmctr);
356 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
357 kvm_s390_available_subfunc.kmf);
358 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
359 kvm_s390_available_subfunc.kmo);
360 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
361 kvm_s390_available_subfunc.pcc);
363 if (test_facility(57)) /* MSA5 */
364 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
365 kvm_s390_available_subfunc.ppno);
367 if (test_facility(146)) /* MSA8 */
368 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
369 kvm_s390_available_subfunc.kma);
371 if (MACHINE_HAS_ESOP)
372 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
374 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
375 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
377 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
378 !test_facility(3) || !nested)
380 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
381 if (sclp.has_64bscao)
382 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
384 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
386 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
388 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
390 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
392 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
394 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
396 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
398 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
399 * all skey handling functions read/set the skey from the PGSTE
400 * instead of the real storage key.
402 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
403 * pages being detected as preserved although they are resident.
405 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
406 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
408 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
409 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
410 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
412 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
413 * cannot easily shadow the SCA because of the ipte lock.
417 int kvm_arch_init(void *opaque)
421 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
425 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
427 goto out_debug_unreg;
430 kvm_s390_cpu_feat_init();
432 /* Register floating interrupt controller interface. */
433 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
435 pr_err("A FLIC registration call failed with rc=%d\n", rc);
436 goto out_debug_unreg;
439 rc = kvm_s390_gib_init(GAL_ISC);
441 goto out_gib_destroy;
446 kvm_s390_gib_destroy();
448 debug_unregister(kvm_s390_dbf);
452 void kvm_arch_exit(void)
454 kvm_s390_gib_destroy();
455 debug_unregister(kvm_s390_dbf);
458 /* Section: device related */
459 long kvm_arch_dev_ioctl(struct file *filp,
460 unsigned int ioctl, unsigned long arg)
462 if (ioctl == KVM_S390_ENABLE_SIE)
463 return s390_enable_sie();
467 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
472 case KVM_CAP_S390_PSW:
473 case KVM_CAP_S390_GMAP:
474 case KVM_CAP_SYNC_MMU:
475 #ifdef CONFIG_KVM_S390_UCONTROL
476 case KVM_CAP_S390_UCONTROL:
478 case KVM_CAP_ASYNC_PF:
479 case KVM_CAP_SYNC_REGS:
480 case KVM_CAP_ONE_REG:
481 case KVM_CAP_ENABLE_CAP:
482 case KVM_CAP_S390_CSS_SUPPORT:
483 case KVM_CAP_IOEVENTFD:
484 case KVM_CAP_DEVICE_CTRL:
485 case KVM_CAP_S390_IRQCHIP:
486 case KVM_CAP_VM_ATTRIBUTES:
487 case KVM_CAP_MP_STATE:
488 case KVM_CAP_IMMEDIATE_EXIT:
489 case KVM_CAP_S390_INJECT_IRQ:
490 case KVM_CAP_S390_USER_SIGP:
491 case KVM_CAP_S390_USER_STSI:
492 case KVM_CAP_S390_SKEYS:
493 case KVM_CAP_S390_IRQ_STATE:
494 case KVM_CAP_S390_USER_INSTR0:
495 case KVM_CAP_S390_CMMA_MIGRATION:
496 case KVM_CAP_S390_AIS:
497 case KVM_CAP_S390_AIS_MIGRATION:
500 case KVM_CAP_S390_HPAGE_1M:
502 if (hpage && !kvm_is_ucontrol(kvm))
505 case KVM_CAP_S390_MEM_OP:
508 case KVM_CAP_NR_VCPUS:
509 case KVM_CAP_MAX_VCPUS:
510 r = KVM_S390_BSCA_CPU_SLOTS;
511 if (!kvm_s390_use_sca_entries())
513 else if (sclp.has_esca && sclp.has_64bscao)
514 r = KVM_S390_ESCA_CPU_SLOTS;
516 case KVM_CAP_S390_COW:
517 r = MACHINE_HAS_ESOP;
519 case KVM_CAP_S390_VECTOR_REGISTERS:
522 case KVM_CAP_S390_RI:
523 r = test_facility(64);
525 case KVM_CAP_S390_GS:
526 r = test_facility(133);
528 case KVM_CAP_S390_BPB:
529 r = test_facility(82);
537 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
538 struct kvm_memory_slot *memslot)
541 gfn_t cur_gfn, last_gfn;
542 unsigned long gaddr, vmaddr;
543 struct gmap *gmap = kvm->arch.gmap;
544 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
546 /* Loop over all guest segments */
547 cur_gfn = memslot->base_gfn;
548 last_gfn = memslot->base_gfn + memslot->npages;
549 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
550 gaddr = gfn_to_gpa(cur_gfn);
551 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
552 if (kvm_is_error_hva(vmaddr))
555 bitmap_zero(bitmap, _PAGE_ENTRIES);
556 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
557 for (i = 0; i < _PAGE_ENTRIES; i++) {
558 if (test_bit(i, bitmap))
559 mark_page_dirty(kvm, cur_gfn + i);
562 if (fatal_signal_pending(current))
568 /* Section: vm related */
569 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
572 * Get (and clear) the dirty memory log for a memory slot.
574 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
575 struct kvm_dirty_log *log)
579 struct kvm_memslots *slots;
580 struct kvm_memory_slot *memslot;
583 if (kvm_is_ucontrol(kvm))
586 mutex_lock(&kvm->slots_lock);
589 if (log->slot >= KVM_USER_MEM_SLOTS)
592 slots = kvm_memslots(kvm);
593 memslot = id_to_memslot(slots, log->slot);
595 if (!memslot->dirty_bitmap)
598 kvm_s390_sync_dirty_log(kvm, memslot);
599 r = kvm_get_dirty_log(kvm, log, &is_dirty);
603 /* Clear the dirty log */
605 n = kvm_dirty_bitmap_bytes(memslot);
606 memset(memslot->dirty_bitmap, 0, n);
610 mutex_unlock(&kvm->slots_lock);
614 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
617 struct kvm_vcpu *vcpu;
619 kvm_for_each_vcpu(i, vcpu, kvm) {
620 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
624 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
632 case KVM_CAP_S390_IRQCHIP:
633 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
634 kvm->arch.use_irqchip = 1;
637 case KVM_CAP_S390_USER_SIGP:
638 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
639 kvm->arch.user_sigp = 1;
642 case KVM_CAP_S390_VECTOR_REGISTERS:
643 mutex_lock(&kvm->lock);
644 if (kvm->created_vcpus) {
646 } else if (MACHINE_HAS_VX) {
647 set_kvm_facility(kvm->arch.model.fac_mask, 129);
648 set_kvm_facility(kvm->arch.model.fac_list, 129);
649 if (test_facility(134)) {
650 set_kvm_facility(kvm->arch.model.fac_mask, 134);
651 set_kvm_facility(kvm->arch.model.fac_list, 134);
653 if (test_facility(135)) {
654 set_kvm_facility(kvm->arch.model.fac_mask, 135);
655 set_kvm_facility(kvm->arch.model.fac_list, 135);
660 mutex_unlock(&kvm->lock);
661 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
662 r ? "(not available)" : "(success)");
664 case KVM_CAP_S390_RI:
666 mutex_lock(&kvm->lock);
667 if (kvm->created_vcpus) {
669 } else if (test_facility(64)) {
670 set_kvm_facility(kvm->arch.model.fac_mask, 64);
671 set_kvm_facility(kvm->arch.model.fac_list, 64);
674 mutex_unlock(&kvm->lock);
675 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
676 r ? "(not available)" : "(success)");
678 case KVM_CAP_S390_AIS:
679 mutex_lock(&kvm->lock);
680 if (kvm->created_vcpus) {
683 set_kvm_facility(kvm->arch.model.fac_mask, 72);
684 set_kvm_facility(kvm->arch.model.fac_list, 72);
687 mutex_unlock(&kvm->lock);
688 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
689 r ? "(not available)" : "(success)");
691 case KVM_CAP_S390_GS:
693 mutex_lock(&kvm->lock);
694 if (kvm->created_vcpus) {
696 } else if (test_facility(133)) {
697 set_kvm_facility(kvm->arch.model.fac_mask, 133);
698 set_kvm_facility(kvm->arch.model.fac_list, 133);
701 mutex_unlock(&kvm->lock);
702 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
703 r ? "(not available)" : "(success)");
705 case KVM_CAP_S390_HPAGE_1M:
706 mutex_lock(&kvm->lock);
707 if (kvm->created_vcpus)
709 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
713 down_write(&kvm->mm->mmap_sem);
714 kvm->mm->context.allow_gmap_hpage_1m = 1;
715 up_write(&kvm->mm->mmap_sem);
717 * We might have to create fake 4k page
718 * tables. To avoid that the hardware works on
719 * stale PGSTEs, we emulate these instructions.
721 kvm->arch.use_skf = 0;
722 kvm->arch.use_pfmfi = 0;
724 mutex_unlock(&kvm->lock);
725 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
726 r ? "(not available)" : "(success)");
728 case KVM_CAP_S390_USER_STSI:
729 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
730 kvm->arch.user_stsi = 1;
733 case KVM_CAP_S390_USER_INSTR0:
734 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
735 kvm->arch.user_instr0 = 1;
736 icpt_operexc_on_all_vcpus(kvm);
746 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
750 switch (attr->attr) {
751 case KVM_S390_VM_MEM_LIMIT_SIZE:
753 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
754 kvm->arch.mem_limit);
755 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
765 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
769 switch (attr->attr) {
770 case KVM_S390_VM_MEM_ENABLE_CMMA:
775 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
776 mutex_lock(&kvm->lock);
777 if (kvm->created_vcpus)
779 else if (kvm->mm->context.allow_gmap_hpage_1m)
782 kvm->arch.use_cmma = 1;
783 /* Not compatible with cmma. */
784 kvm->arch.use_pfmfi = 0;
787 mutex_unlock(&kvm->lock);
789 case KVM_S390_VM_MEM_CLR_CMMA:
794 if (!kvm->arch.use_cmma)
797 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
798 mutex_lock(&kvm->lock);
799 idx = srcu_read_lock(&kvm->srcu);
800 s390_reset_cmma(kvm->arch.gmap->mm);
801 srcu_read_unlock(&kvm->srcu, idx);
802 mutex_unlock(&kvm->lock);
805 case KVM_S390_VM_MEM_LIMIT_SIZE: {
806 unsigned long new_limit;
808 if (kvm_is_ucontrol(kvm))
811 if (get_user(new_limit, (u64 __user *)attr->addr))
814 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
815 new_limit > kvm->arch.mem_limit)
821 /* gmap_create takes last usable address */
822 if (new_limit != KVM_S390_NO_MEM_LIMIT)
826 mutex_lock(&kvm->lock);
827 if (!kvm->created_vcpus) {
828 /* gmap_create will round the limit up */
829 struct gmap *new = gmap_create(current->mm, new_limit);
834 gmap_remove(kvm->arch.gmap);
836 kvm->arch.gmap = new;
840 mutex_unlock(&kvm->lock);
841 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
842 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
843 (void *) kvm->arch.gmap->asce);
853 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
855 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
857 struct kvm_vcpu *vcpu;
860 kvm_s390_vcpu_block_all(kvm);
862 kvm_for_each_vcpu(i, vcpu, kvm) {
863 kvm_s390_vcpu_crypto_setup(vcpu);
864 /* recreate the shadow crycb by leaving the VSIE handler */
865 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
868 kvm_s390_vcpu_unblock_all(kvm);
871 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
873 mutex_lock(&kvm->lock);
874 switch (attr->attr) {
875 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
876 if (!test_kvm_facility(kvm, 76)) {
877 mutex_unlock(&kvm->lock);
881 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
882 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
883 kvm->arch.crypto.aes_kw = 1;
884 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
886 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
887 if (!test_kvm_facility(kvm, 76)) {
888 mutex_unlock(&kvm->lock);
892 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
893 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
894 kvm->arch.crypto.dea_kw = 1;
895 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
897 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
898 if (!test_kvm_facility(kvm, 76)) {
899 mutex_unlock(&kvm->lock);
902 kvm->arch.crypto.aes_kw = 0;
903 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
904 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
905 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
907 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
908 if (!test_kvm_facility(kvm, 76)) {
909 mutex_unlock(&kvm->lock);
912 kvm->arch.crypto.dea_kw = 0;
913 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
914 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
915 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
917 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
918 if (!ap_instructions_available()) {
919 mutex_unlock(&kvm->lock);
922 kvm->arch.crypto.apie = 1;
924 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
925 if (!ap_instructions_available()) {
926 mutex_unlock(&kvm->lock);
929 kvm->arch.crypto.apie = 0;
932 mutex_unlock(&kvm->lock);
936 kvm_s390_vcpu_crypto_reset_all(kvm);
937 mutex_unlock(&kvm->lock);
941 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
944 struct kvm_vcpu *vcpu;
946 kvm_for_each_vcpu(cx, vcpu, kvm)
947 kvm_s390_sync_request(req, vcpu);
951 * Must be called with kvm->srcu held to avoid races on memslots, and with
952 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
954 static int kvm_s390_vm_start_migration(struct kvm *kvm)
956 struct kvm_memory_slot *ms;
957 struct kvm_memslots *slots;
958 unsigned long ram_pages = 0;
961 /* migration mode already enabled */
962 if (kvm->arch.migration_mode)
964 slots = kvm_memslots(kvm);
965 if (!slots || !slots->used_slots)
968 if (!kvm->arch.use_cmma) {
969 kvm->arch.migration_mode = 1;
972 /* mark all the pages in active slots as dirty */
973 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
974 ms = slots->memslots + slotnr;
976 * The second half of the bitmap is only used on x86,
977 * and would be wasted otherwise, so we put it to good
978 * use here to keep track of the state of the storage
981 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
982 ram_pages += ms->npages;
984 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
985 kvm->arch.migration_mode = 1;
986 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
991 * Must be called with kvm->slots_lock to avoid races with ourselves and
992 * kvm_s390_vm_start_migration.
994 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
996 /* migration mode already disabled */
997 if (!kvm->arch.migration_mode)
999 kvm->arch.migration_mode = 0;
1000 if (kvm->arch.use_cmma)
1001 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1005 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1006 struct kvm_device_attr *attr)
1010 mutex_lock(&kvm->slots_lock);
1011 switch (attr->attr) {
1012 case KVM_S390_VM_MIGRATION_START:
1013 res = kvm_s390_vm_start_migration(kvm);
1015 case KVM_S390_VM_MIGRATION_STOP:
1016 res = kvm_s390_vm_stop_migration(kvm);
1021 mutex_unlock(&kvm->slots_lock);
1026 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1027 struct kvm_device_attr *attr)
1029 u64 mig = kvm->arch.migration_mode;
1031 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1034 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1039 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1041 struct kvm_s390_vm_tod_clock gtod;
1043 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1046 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1048 kvm_s390_set_tod_clock(kvm, >od);
1050 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1051 gtod.epoch_idx, gtod.tod);
1056 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1060 if (copy_from_user(>od_high, (void __user *)attr->addr,
1066 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1071 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1073 struct kvm_s390_vm_tod_clock gtod = { 0 };
1075 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1079 kvm_s390_set_tod_clock(kvm, >od);
1080 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1084 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1091 switch (attr->attr) {
1092 case KVM_S390_VM_TOD_EXT:
1093 ret = kvm_s390_set_tod_ext(kvm, attr);
1095 case KVM_S390_VM_TOD_HIGH:
1096 ret = kvm_s390_set_tod_high(kvm, attr);
1098 case KVM_S390_VM_TOD_LOW:
1099 ret = kvm_s390_set_tod_low(kvm, attr);
1108 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1109 struct kvm_s390_vm_tod_clock *gtod)
1111 struct kvm_s390_tod_clock_ext htod;
1115 get_tod_clock_ext((char *)&htod);
1117 gtod->tod = htod.tod + kvm->arch.epoch;
1118 gtod->epoch_idx = 0;
1119 if (test_kvm_facility(kvm, 139)) {
1120 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1121 if (gtod->tod < htod.tod)
1122 gtod->epoch_idx += 1;
1128 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1130 struct kvm_s390_vm_tod_clock gtod;
1132 memset(>od, 0, sizeof(gtod));
1133 kvm_s390_get_tod_clock(kvm, >od);
1134 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1137 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1138 gtod.epoch_idx, gtod.tod);
1142 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1146 if (copy_to_user((void __user *)attr->addr, >od_high,
1149 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1154 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1158 gtod = kvm_s390_get_tod_clock_fast(kvm);
1159 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1161 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1166 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1173 switch (attr->attr) {
1174 case KVM_S390_VM_TOD_EXT:
1175 ret = kvm_s390_get_tod_ext(kvm, attr);
1177 case KVM_S390_VM_TOD_HIGH:
1178 ret = kvm_s390_get_tod_high(kvm, attr);
1180 case KVM_S390_VM_TOD_LOW:
1181 ret = kvm_s390_get_tod_low(kvm, attr);
1190 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1192 struct kvm_s390_vm_cpu_processor *proc;
1193 u16 lowest_ibc, unblocked_ibc;
1196 mutex_lock(&kvm->lock);
1197 if (kvm->created_vcpus) {
1201 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1206 if (!copy_from_user(proc, (void __user *)attr->addr,
1208 kvm->arch.model.cpuid = proc->cpuid;
1209 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1210 unblocked_ibc = sclp.ibc & 0xfff;
1211 if (lowest_ibc && proc->ibc) {
1212 if (proc->ibc > unblocked_ibc)
1213 kvm->arch.model.ibc = unblocked_ibc;
1214 else if (proc->ibc < lowest_ibc)
1215 kvm->arch.model.ibc = lowest_ibc;
1217 kvm->arch.model.ibc = proc->ibc;
1219 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1220 S390_ARCH_FAC_LIST_SIZE_BYTE);
1221 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1222 kvm->arch.model.ibc,
1223 kvm->arch.model.cpuid);
1224 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1225 kvm->arch.model.fac_list[0],
1226 kvm->arch.model.fac_list[1],
1227 kvm->arch.model.fac_list[2]);
1232 mutex_unlock(&kvm->lock);
1236 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1237 struct kvm_device_attr *attr)
1239 struct kvm_s390_vm_cpu_feat data;
1241 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1243 if (!bitmap_subset((unsigned long *) data.feat,
1244 kvm_s390_available_cpu_feat,
1245 KVM_S390_VM_CPU_FEAT_NR_BITS))
1248 mutex_lock(&kvm->lock);
1249 if (kvm->created_vcpus) {
1250 mutex_unlock(&kvm->lock);
1253 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1254 KVM_S390_VM_CPU_FEAT_NR_BITS);
1255 mutex_unlock(&kvm->lock);
1256 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1263 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1264 struct kvm_device_attr *attr)
1266 mutex_lock(&kvm->lock);
1267 if (kvm->created_vcpus) {
1268 mutex_unlock(&kvm->lock);
1272 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1273 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1274 mutex_unlock(&kvm->lock);
1277 mutex_unlock(&kvm->lock);
1279 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1280 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1281 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1282 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1283 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1284 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1285 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1286 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1287 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1288 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1289 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1290 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1291 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1292 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1293 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1294 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1295 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1296 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1297 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1298 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1299 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1300 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1301 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1302 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1303 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1304 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1305 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1306 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1307 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1308 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1309 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1310 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1311 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1312 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1313 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1314 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1315 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1316 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1317 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1318 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1319 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1320 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1321 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1322 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1327 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1331 switch (attr->attr) {
1332 case KVM_S390_VM_CPU_PROCESSOR:
1333 ret = kvm_s390_set_processor(kvm, attr);
1335 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1336 ret = kvm_s390_set_processor_feat(kvm, attr);
1338 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1339 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1345 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1347 struct kvm_s390_vm_cpu_processor *proc;
1350 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1355 proc->cpuid = kvm->arch.model.cpuid;
1356 proc->ibc = kvm->arch.model.ibc;
1357 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1358 S390_ARCH_FAC_LIST_SIZE_BYTE);
1359 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1360 kvm->arch.model.ibc,
1361 kvm->arch.model.cpuid);
1362 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1363 kvm->arch.model.fac_list[0],
1364 kvm->arch.model.fac_list[1],
1365 kvm->arch.model.fac_list[2]);
1366 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1373 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1375 struct kvm_s390_vm_cpu_machine *mach;
1378 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1383 get_cpu_id((struct cpuid *) &mach->cpuid);
1384 mach->ibc = sclp.ibc;
1385 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1386 S390_ARCH_FAC_LIST_SIZE_BYTE);
1387 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1388 sizeof(S390_lowcore.stfle_fac_list));
1389 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1390 kvm->arch.model.ibc,
1391 kvm->arch.model.cpuid);
1392 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1396 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1400 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1407 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1408 struct kvm_device_attr *attr)
1410 struct kvm_s390_vm_cpu_feat data;
1412 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1413 KVM_S390_VM_CPU_FEAT_NR_BITS);
1414 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1416 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1423 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1424 struct kvm_device_attr *attr)
1426 struct kvm_s390_vm_cpu_feat data;
1428 bitmap_copy((unsigned long *) data.feat,
1429 kvm_s390_available_cpu_feat,
1430 KVM_S390_VM_CPU_FEAT_NR_BITS);
1431 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1433 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1440 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1441 struct kvm_device_attr *attr)
1443 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1444 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1447 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1448 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1449 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1450 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1451 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1452 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1453 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1454 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1455 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1456 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1457 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1458 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1459 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1460 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1461 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1462 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1463 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1464 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1465 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1466 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1467 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1468 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1469 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1470 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1471 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1472 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1473 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1474 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1475 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1476 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1477 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1478 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1479 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1480 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1481 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1482 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1483 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1484 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1485 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1486 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1487 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1488 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1489 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1490 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1495 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1496 struct kvm_device_attr *attr)
1498 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1499 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1502 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1503 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1504 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1505 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1506 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1507 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1508 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1509 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1510 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1511 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1512 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1513 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1514 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1515 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1516 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1517 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1518 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1519 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1520 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1521 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1522 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1523 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1524 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1525 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1526 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1527 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1528 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1529 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1530 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1531 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1532 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1533 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1534 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1535 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1536 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1537 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1538 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1539 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1540 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1541 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1542 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1543 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1544 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1545 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1550 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1554 switch (attr->attr) {
1555 case KVM_S390_VM_CPU_PROCESSOR:
1556 ret = kvm_s390_get_processor(kvm, attr);
1558 case KVM_S390_VM_CPU_MACHINE:
1559 ret = kvm_s390_get_machine(kvm, attr);
1561 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1562 ret = kvm_s390_get_processor_feat(kvm, attr);
1564 case KVM_S390_VM_CPU_MACHINE_FEAT:
1565 ret = kvm_s390_get_machine_feat(kvm, attr);
1567 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1568 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1570 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1571 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1577 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1581 switch (attr->group) {
1582 case KVM_S390_VM_MEM_CTRL:
1583 ret = kvm_s390_set_mem_control(kvm, attr);
1585 case KVM_S390_VM_TOD:
1586 ret = kvm_s390_set_tod(kvm, attr);
1588 case KVM_S390_VM_CPU_MODEL:
1589 ret = kvm_s390_set_cpu_model(kvm, attr);
1591 case KVM_S390_VM_CRYPTO:
1592 ret = kvm_s390_vm_set_crypto(kvm, attr);
1594 case KVM_S390_VM_MIGRATION:
1595 ret = kvm_s390_vm_set_migration(kvm, attr);
1605 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1609 switch (attr->group) {
1610 case KVM_S390_VM_MEM_CTRL:
1611 ret = kvm_s390_get_mem_control(kvm, attr);
1613 case KVM_S390_VM_TOD:
1614 ret = kvm_s390_get_tod(kvm, attr);
1616 case KVM_S390_VM_CPU_MODEL:
1617 ret = kvm_s390_get_cpu_model(kvm, attr);
1619 case KVM_S390_VM_MIGRATION:
1620 ret = kvm_s390_vm_get_migration(kvm, attr);
1630 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1634 switch (attr->group) {
1635 case KVM_S390_VM_MEM_CTRL:
1636 switch (attr->attr) {
1637 case KVM_S390_VM_MEM_ENABLE_CMMA:
1638 case KVM_S390_VM_MEM_CLR_CMMA:
1639 ret = sclp.has_cmma ? 0 : -ENXIO;
1641 case KVM_S390_VM_MEM_LIMIT_SIZE:
1649 case KVM_S390_VM_TOD:
1650 switch (attr->attr) {
1651 case KVM_S390_VM_TOD_LOW:
1652 case KVM_S390_VM_TOD_HIGH:
1660 case KVM_S390_VM_CPU_MODEL:
1661 switch (attr->attr) {
1662 case KVM_S390_VM_CPU_PROCESSOR:
1663 case KVM_S390_VM_CPU_MACHINE:
1664 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1665 case KVM_S390_VM_CPU_MACHINE_FEAT:
1666 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1667 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1675 case KVM_S390_VM_CRYPTO:
1676 switch (attr->attr) {
1677 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1678 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1679 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1680 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1683 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1684 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1685 ret = ap_instructions_available() ? 0 : -ENXIO;
1692 case KVM_S390_VM_MIGRATION:
1703 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1707 int srcu_idx, i, r = 0;
1709 if (args->flags != 0)
1712 /* Is this guest using storage keys? */
1713 if (!mm_uses_skeys(current->mm))
1714 return KVM_S390_GET_SKEYS_NONE;
1716 /* Enforce sane limit on memory allocation */
1717 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1720 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1724 down_read(¤t->mm->mmap_sem);
1725 srcu_idx = srcu_read_lock(&kvm->srcu);
1726 for (i = 0; i < args->count; i++) {
1727 hva = gfn_to_hva(kvm, args->start_gfn + i);
1728 if (kvm_is_error_hva(hva)) {
1733 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1737 srcu_read_unlock(&kvm->srcu, srcu_idx);
1738 up_read(¤t->mm->mmap_sem);
1741 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1742 sizeof(uint8_t) * args->count);
1751 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1755 int srcu_idx, i, r = 0;
1758 if (args->flags != 0)
1761 /* Enforce sane limit on memory allocation */
1762 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1765 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1769 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1770 sizeof(uint8_t) * args->count);
1776 /* Enable storage key handling for the guest */
1777 r = s390_enable_skey();
1782 down_read(¤t->mm->mmap_sem);
1783 srcu_idx = srcu_read_lock(&kvm->srcu);
1784 while (i < args->count) {
1786 hva = gfn_to_hva(kvm, args->start_gfn + i);
1787 if (kvm_is_error_hva(hva)) {
1792 /* Lowest order bit is reserved */
1793 if (keys[i] & 0x01) {
1798 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1800 r = fixup_user_fault(current, current->mm, hva,
1801 FAULT_FLAG_WRITE, &unlocked);
1808 srcu_read_unlock(&kvm->srcu, srcu_idx);
1809 up_read(¤t->mm->mmap_sem);
1816 * Base address and length must be sent at the start of each block, therefore
1817 * it's cheaper to send some clean data, as long as it's less than the size of
1820 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1821 /* for consistency */
1822 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1825 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1826 * address falls in a hole. In that case the index of one of the memslots
1827 * bordering the hole is returned.
1829 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1831 int start = 0, end = slots->used_slots;
1832 int slot = atomic_read(&slots->lru_slot);
1833 struct kvm_memory_slot *memslots = slots->memslots;
1835 if (gfn >= memslots[slot].base_gfn &&
1836 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1839 while (start < end) {
1840 slot = start + (end - start) / 2;
1842 if (gfn >= memslots[slot].base_gfn)
1848 if (gfn >= memslots[start].base_gfn &&
1849 gfn < memslots[start].base_gfn + memslots[start].npages) {
1850 atomic_set(&slots->lru_slot, start);
1856 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1857 u8 *res, unsigned long bufsize)
1859 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1862 while (args->count < bufsize) {
1863 hva = gfn_to_hva(kvm, cur_gfn);
1865 * We return an error if the first value was invalid, but we
1866 * return successfully if at least one value was copied.
1868 if (kvm_is_error_hva(hva))
1869 return args->count ? 0 : -EFAULT;
1870 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1872 res[args->count++] = (pgstev >> 24) & 0x43;
1879 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1880 unsigned long cur_gfn)
1882 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1883 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1884 unsigned long ofs = cur_gfn - ms->base_gfn;
1886 if (ms->base_gfn + ms->npages <= cur_gfn) {
1888 /* If we are above the highest slot, wrap around */
1890 slotidx = slots->used_slots - 1;
1892 ms = slots->memslots + slotidx;
1895 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1896 while ((slotidx > 0) && (ofs >= ms->npages)) {
1898 ms = slots->memslots + slotidx;
1899 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1901 return ms->base_gfn + ofs;
1904 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1905 u8 *res, unsigned long bufsize)
1907 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1908 struct kvm_memslots *slots = kvm_memslots(kvm);
1909 struct kvm_memory_slot *ms;
1911 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1912 ms = gfn_to_memslot(kvm, cur_gfn);
1914 args->start_gfn = cur_gfn;
1917 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1918 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1920 while (args->count < bufsize) {
1921 hva = gfn_to_hva(kvm, cur_gfn);
1922 if (kvm_is_error_hva(hva))
1924 /* Decrement only if we actually flipped the bit to 0 */
1925 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1926 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1927 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1929 /* Save the value */
1930 res[args->count++] = (pgstev >> 24) & 0x43;
1931 /* If the next bit is too far away, stop. */
1932 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1934 /* If we reached the previous "next", find the next one */
1935 if (cur_gfn == next_gfn)
1936 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1937 /* Reached the end of memory or of the buffer, stop */
1938 if ((next_gfn >= mem_end) ||
1939 (next_gfn - args->start_gfn >= bufsize))
1942 /* Reached the end of the current memslot, take the next one. */
1943 if (cur_gfn - ms->base_gfn >= ms->npages) {
1944 ms = gfn_to_memslot(kvm, cur_gfn);
1953 * This function searches for the next page with dirty CMMA attributes, and
1954 * saves the attributes in the buffer up to either the end of the buffer or
1955 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1956 * no trailing clean bytes are saved.
1957 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1958 * output buffer will indicate 0 as length.
1960 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1961 struct kvm_s390_cmma_log *args)
1963 unsigned long bufsize;
1964 int srcu_idx, peek, ret;
1967 if (!kvm->arch.use_cmma)
1969 /* Invalid/unsupported flags were specified */
1970 if (args->flags & ~KVM_S390_CMMA_PEEK)
1972 /* Migration mode query, and we are not doing a migration */
1973 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1974 if (!peek && !kvm->arch.migration_mode)
1976 /* CMMA is disabled or was not used, or the buffer has length zero */
1977 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1978 if (!bufsize || !kvm->mm->context.uses_cmm) {
1979 memset(args, 0, sizeof(*args));
1982 /* We are not peeking, and there are no dirty pages */
1983 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1984 memset(args, 0, sizeof(*args));
1988 values = vmalloc(bufsize);
1992 down_read(&kvm->mm->mmap_sem);
1993 srcu_idx = srcu_read_lock(&kvm->srcu);
1995 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1997 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1998 srcu_read_unlock(&kvm->srcu, srcu_idx);
1999 up_read(&kvm->mm->mmap_sem);
2001 if (kvm->arch.migration_mode)
2002 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2004 args->remaining = 0;
2006 if (copy_to_user((void __user *)args->values, values, args->count))
2014 * This function sets the CMMA attributes for the given pages. If the input
2015 * buffer has zero length, no action is taken, otherwise the attributes are
2016 * set and the mm->context.uses_cmm flag is set.
2018 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2019 const struct kvm_s390_cmma_log *args)
2021 unsigned long hva, mask, pgstev, i;
2023 int srcu_idx, r = 0;
2027 if (!kvm->arch.use_cmma)
2029 /* invalid/unsupported flags */
2030 if (args->flags != 0)
2032 /* Enforce sane limit on memory allocation */
2033 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2036 if (args->count == 0)
2039 bits = vmalloc(array_size(sizeof(*bits), args->count));
2043 r = copy_from_user(bits, (void __user *)args->values, args->count);
2049 down_read(&kvm->mm->mmap_sem);
2050 srcu_idx = srcu_read_lock(&kvm->srcu);
2051 for (i = 0; i < args->count; i++) {
2052 hva = gfn_to_hva(kvm, args->start_gfn + i);
2053 if (kvm_is_error_hva(hva)) {
2059 pgstev = pgstev << 24;
2060 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2061 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2063 srcu_read_unlock(&kvm->srcu, srcu_idx);
2064 up_read(&kvm->mm->mmap_sem);
2066 if (!kvm->mm->context.uses_cmm) {
2067 down_write(&kvm->mm->mmap_sem);
2068 kvm->mm->context.uses_cmm = 1;
2069 up_write(&kvm->mm->mmap_sem);
2076 long kvm_arch_vm_ioctl(struct file *filp,
2077 unsigned int ioctl, unsigned long arg)
2079 struct kvm *kvm = filp->private_data;
2080 void __user *argp = (void __user *)arg;
2081 struct kvm_device_attr attr;
2085 case KVM_S390_INTERRUPT: {
2086 struct kvm_s390_interrupt s390int;
2089 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2091 r = kvm_s390_inject_vm(kvm, &s390int);
2094 case KVM_CREATE_IRQCHIP: {
2095 struct kvm_irq_routing_entry routing;
2098 if (kvm->arch.use_irqchip) {
2099 /* Set up dummy routing. */
2100 memset(&routing, 0, sizeof(routing));
2101 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2105 case KVM_SET_DEVICE_ATTR: {
2107 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2109 r = kvm_s390_vm_set_attr(kvm, &attr);
2112 case KVM_GET_DEVICE_ATTR: {
2114 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2116 r = kvm_s390_vm_get_attr(kvm, &attr);
2119 case KVM_HAS_DEVICE_ATTR: {
2121 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2123 r = kvm_s390_vm_has_attr(kvm, &attr);
2126 case KVM_S390_GET_SKEYS: {
2127 struct kvm_s390_skeys args;
2130 if (copy_from_user(&args, argp,
2131 sizeof(struct kvm_s390_skeys)))
2133 r = kvm_s390_get_skeys(kvm, &args);
2136 case KVM_S390_SET_SKEYS: {
2137 struct kvm_s390_skeys args;
2140 if (copy_from_user(&args, argp,
2141 sizeof(struct kvm_s390_skeys)))
2143 r = kvm_s390_set_skeys(kvm, &args);
2146 case KVM_S390_GET_CMMA_BITS: {
2147 struct kvm_s390_cmma_log args;
2150 if (copy_from_user(&args, argp, sizeof(args)))
2152 mutex_lock(&kvm->slots_lock);
2153 r = kvm_s390_get_cmma_bits(kvm, &args);
2154 mutex_unlock(&kvm->slots_lock);
2156 r = copy_to_user(argp, &args, sizeof(args));
2162 case KVM_S390_SET_CMMA_BITS: {
2163 struct kvm_s390_cmma_log args;
2166 if (copy_from_user(&args, argp, sizeof(args)))
2168 mutex_lock(&kvm->slots_lock);
2169 r = kvm_s390_set_cmma_bits(kvm, &args);
2170 mutex_unlock(&kvm->slots_lock);
2180 static int kvm_s390_apxa_installed(void)
2182 struct ap_config_info info;
2184 if (ap_instructions_available()) {
2185 if (ap_qci(&info) == 0)
2193 * The format of the crypto control block (CRYCB) is specified in the 3 low
2194 * order bits of the CRYCB designation (CRYCBD) field as follows:
2195 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2196 * AP extended addressing (APXA) facility are installed.
2197 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2198 * Format 2: Both the APXA and MSAX3 facilities are installed
2200 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2202 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2204 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2205 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2207 /* Check whether MSAX3 is installed */
2208 if (!test_kvm_facility(kvm, 76))
2211 if (kvm_s390_apxa_installed())
2212 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2214 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2217 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2218 unsigned long *aqm, unsigned long *adm)
2220 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2222 mutex_lock(&kvm->lock);
2223 kvm_s390_vcpu_block_all(kvm);
2225 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2226 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2227 memcpy(crycb->apcb1.apm, apm, 32);
2228 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2229 apm[0], apm[1], apm[2], apm[3]);
2230 memcpy(crycb->apcb1.aqm, aqm, 32);
2231 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2232 aqm[0], aqm[1], aqm[2], aqm[3]);
2233 memcpy(crycb->apcb1.adm, adm, 32);
2234 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2235 adm[0], adm[1], adm[2], adm[3]);
2238 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2239 memcpy(crycb->apcb0.apm, apm, 8);
2240 memcpy(crycb->apcb0.aqm, aqm, 2);
2241 memcpy(crycb->apcb0.adm, adm, 2);
2242 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2243 apm[0], *((unsigned short *)aqm),
2244 *((unsigned short *)adm));
2246 default: /* Can not happen */
2250 /* recreate the shadow crycb for each vcpu */
2251 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2252 kvm_s390_vcpu_unblock_all(kvm);
2253 mutex_unlock(&kvm->lock);
2255 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2257 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2259 mutex_lock(&kvm->lock);
2260 kvm_s390_vcpu_block_all(kvm);
2262 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2263 sizeof(kvm->arch.crypto.crycb->apcb0));
2264 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2265 sizeof(kvm->arch.crypto.crycb->apcb1));
2267 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2268 /* recreate the shadow crycb for each vcpu */
2269 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2270 kvm_s390_vcpu_unblock_all(kvm);
2271 mutex_unlock(&kvm->lock);
2273 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2275 static u64 kvm_s390_get_initial_cpuid(void)
2280 cpuid.version = 0xff;
2281 return *((u64 *) &cpuid);
2284 static void kvm_s390_crypto_init(struct kvm *kvm)
2286 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2287 kvm_s390_set_crycb_format(kvm);
2289 if (!test_kvm_facility(kvm, 76))
2292 /* Enable AES/DEA protected key functions by default */
2293 kvm->arch.crypto.aes_kw = 1;
2294 kvm->arch.crypto.dea_kw = 1;
2295 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2296 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2297 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2298 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2301 static void sca_dispose(struct kvm *kvm)
2303 if (kvm->arch.use_esca)
2304 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2306 free_page((unsigned long)(kvm->arch.sca));
2307 kvm->arch.sca = NULL;
2310 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2312 gfp_t alloc_flags = GFP_KERNEL;
2314 char debug_name[16];
2315 static unsigned long sca_offset;
2318 #ifdef CONFIG_KVM_S390_UCONTROL
2319 if (type & ~KVM_VM_S390_UCONTROL)
2321 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2328 rc = s390_enable_sie();
2334 if (!sclp.has_64bscao)
2335 alloc_flags |= GFP_DMA;
2336 rwlock_init(&kvm->arch.sca_lock);
2337 /* start with basic SCA */
2338 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2341 spin_lock(&kvm_lock);
2343 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2345 kvm->arch.sca = (struct bsca_block *)
2346 ((char *) kvm->arch.sca + sca_offset);
2347 spin_unlock(&kvm_lock);
2349 sprintf(debug_name, "kvm-%u", current->pid);
2351 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2355 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2356 kvm->arch.sie_page2 =
2357 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2358 if (!kvm->arch.sie_page2)
2361 kvm->arch.sie_page2->kvm = kvm;
2362 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2364 for (i = 0; i < kvm_s390_fac_size(); i++) {
2365 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2366 (kvm_s390_fac_base[i] |
2367 kvm_s390_fac_ext[i]);
2368 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2369 kvm_s390_fac_base[i];
2371 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2373 /* we are always in czam mode - even on pre z14 machines */
2374 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2375 set_kvm_facility(kvm->arch.model.fac_list, 138);
2376 /* we emulate STHYI in kvm */
2377 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2378 set_kvm_facility(kvm->arch.model.fac_list, 74);
2379 if (MACHINE_HAS_TLB_GUEST) {
2380 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2381 set_kvm_facility(kvm->arch.model.fac_list, 147);
2384 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2385 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2387 kvm_s390_crypto_init(kvm);
2389 mutex_init(&kvm->arch.float_int.ais_lock);
2390 spin_lock_init(&kvm->arch.float_int.lock);
2391 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2392 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2393 init_waitqueue_head(&kvm->arch.ipte_wq);
2394 mutex_init(&kvm->arch.ipte_mutex);
2396 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2397 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2399 if (type & KVM_VM_S390_UCONTROL) {
2400 kvm->arch.gmap = NULL;
2401 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2403 if (sclp.hamax == U64_MAX)
2404 kvm->arch.mem_limit = TASK_SIZE_MAX;
2406 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2408 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2409 if (!kvm->arch.gmap)
2411 kvm->arch.gmap->private = kvm;
2412 kvm->arch.gmap->pfault_enabled = 0;
2415 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2416 kvm->arch.use_skf = sclp.has_skey;
2417 spin_lock_init(&kvm->arch.start_stop_lock);
2418 kvm_s390_vsie_init(kvm);
2419 kvm_s390_gisa_init(kvm);
2420 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2424 free_page((unsigned long)kvm->arch.sie_page2);
2425 debug_unregister(kvm->arch.dbf);
2427 KVM_EVENT(3, "creation of vm failed: %d", rc);
2431 bool kvm_arch_has_vcpu_debugfs(void)
2436 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2441 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2443 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2444 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2445 kvm_s390_clear_local_irqs(vcpu);
2446 kvm_clear_async_pf_completion_queue(vcpu);
2447 if (!kvm_is_ucontrol(vcpu->kvm))
2450 if (kvm_is_ucontrol(vcpu->kvm))
2451 gmap_remove(vcpu->arch.gmap);
2453 if (vcpu->kvm->arch.use_cmma)
2454 kvm_s390_vcpu_unsetup_cmma(vcpu);
2455 free_page((unsigned long)(vcpu->arch.sie_block));
2457 kvm_vcpu_uninit(vcpu);
2458 kmem_cache_free(kvm_vcpu_cache, vcpu);
2461 static void kvm_free_vcpus(struct kvm *kvm)
2464 struct kvm_vcpu *vcpu;
2466 kvm_for_each_vcpu(i, vcpu, kvm)
2467 kvm_arch_vcpu_destroy(vcpu);
2469 mutex_lock(&kvm->lock);
2470 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2471 kvm->vcpus[i] = NULL;
2473 atomic_set(&kvm->online_vcpus, 0);
2474 mutex_unlock(&kvm->lock);
2477 void kvm_arch_destroy_vm(struct kvm *kvm)
2479 kvm_free_vcpus(kvm);
2481 debug_unregister(kvm->arch.dbf);
2482 kvm_s390_gisa_destroy(kvm);
2483 free_page((unsigned long)kvm->arch.sie_page2);
2484 if (!kvm_is_ucontrol(kvm))
2485 gmap_remove(kvm->arch.gmap);
2486 kvm_s390_destroy_adapters(kvm);
2487 kvm_s390_clear_float_irqs(kvm);
2488 kvm_s390_vsie_destroy(kvm);
2489 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2492 /* Section: vcpu related */
2493 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2495 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2496 if (!vcpu->arch.gmap)
2498 vcpu->arch.gmap->private = vcpu->kvm;
2503 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2505 if (!kvm_s390_use_sca_entries())
2507 read_lock(&vcpu->kvm->arch.sca_lock);
2508 if (vcpu->kvm->arch.use_esca) {
2509 struct esca_block *sca = vcpu->kvm->arch.sca;
2511 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2512 sca->cpu[vcpu->vcpu_id].sda = 0;
2514 struct bsca_block *sca = vcpu->kvm->arch.sca;
2516 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2517 sca->cpu[vcpu->vcpu_id].sda = 0;
2519 read_unlock(&vcpu->kvm->arch.sca_lock);
2522 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2524 if (!kvm_s390_use_sca_entries()) {
2525 struct bsca_block *sca = vcpu->kvm->arch.sca;
2527 /* we still need the basic sca for the ipte control */
2528 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2529 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2532 read_lock(&vcpu->kvm->arch.sca_lock);
2533 if (vcpu->kvm->arch.use_esca) {
2534 struct esca_block *sca = vcpu->kvm->arch.sca;
2536 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2537 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2538 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2539 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2540 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2542 struct bsca_block *sca = vcpu->kvm->arch.sca;
2544 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2545 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2546 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2547 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2549 read_unlock(&vcpu->kvm->arch.sca_lock);
2552 /* Basic SCA to Extended SCA data copy routines */
2553 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2556 d->sigp_ctrl.c = s->sigp_ctrl.c;
2557 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2560 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2564 d->ipte_control = s->ipte_control;
2566 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2567 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2570 static int sca_switch_to_extended(struct kvm *kvm)
2572 struct bsca_block *old_sca = kvm->arch.sca;
2573 struct esca_block *new_sca;
2574 struct kvm_vcpu *vcpu;
2575 unsigned int vcpu_idx;
2578 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2582 scaoh = (u32)((u64)(new_sca) >> 32);
2583 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2585 kvm_s390_vcpu_block_all(kvm);
2586 write_lock(&kvm->arch.sca_lock);
2588 sca_copy_b_to_e(new_sca, old_sca);
2590 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2591 vcpu->arch.sie_block->scaoh = scaoh;
2592 vcpu->arch.sie_block->scaol = scaol;
2593 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2595 kvm->arch.sca = new_sca;
2596 kvm->arch.use_esca = 1;
2598 write_unlock(&kvm->arch.sca_lock);
2599 kvm_s390_vcpu_unblock_all(kvm);
2601 free_page((unsigned long)old_sca);
2603 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2604 old_sca, kvm->arch.sca);
2608 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2612 if (!kvm_s390_use_sca_entries()) {
2613 if (id < KVM_MAX_VCPUS)
2617 if (id < KVM_S390_BSCA_CPU_SLOTS)
2619 if (!sclp.has_esca || !sclp.has_64bscao)
2622 mutex_lock(&kvm->lock);
2623 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2624 mutex_unlock(&kvm->lock);
2626 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2629 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2631 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2632 kvm_clear_async_pf_completion_queue(vcpu);
2633 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2639 kvm_s390_set_prefix(vcpu, 0);
2640 if (test_kvm_facility(vcpu->kvm, 64))
2641 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2642 if (test_kvm_facility(vcpu->kvm, 82))
2643 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2644 if (test_kvm_facility(vcpu->kvm, 133))
2645 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2646 if (test_kvm_facility(vcpu->kvm, 156))
2647 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2648 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2649 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2652 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2654 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2656 if (kvm_is_ucontrol(vcpu->kvm))
2657 return __kvm_ucontrol_vcpu_init(vcpu);
2662 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2663 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2665 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2666 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2667 vcpu->arch.cputm_start = get_tod_clock_fast();
2668 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2671 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2672 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2674 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2675 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2676 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2677 vcpu->arch.cputm_start = 0;
2678 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2681 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2682 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2684 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2685 vcpu->arch.cputm_enabled = true;
2686 __start_cpu_timer_accounting(vcpu);
2689 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2690 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2692 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2693 __stop_cpu_timer_accounting(vcpu);
2694 vcpu->arch.cputm_enabled = false;
2697 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2699 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2700 __enable_cpu_timer_accounting(vcpu);
2704 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2706 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2707 __disable_cpu_timer_accounting(vcpu);
2711 /* set the cpu timer - may only be called from the VCPU thread itself */
2712 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2714 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2715 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2716 if (vcpu->arch.cputm_enabled)
2717 vcpu->arch.cputm_start = get_tod_clock_fast();
2718 vcpu->arch.sie_block->cputm = cputm;
2719 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2723 /* update and get the cpu timer - can also be called from other VCPU threads */
2724 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2729 if (unlikely(!vcpu->arch.cputm_enabled))
2730 return vcpu->arch.sie_block->cputm;
2732 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2734 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2736 * If the writer would ever execute a read in the critical
2737 * section, e.g. in irq context, we have a deadlock.
2739 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2740 value = vcpu->arch.sie_block->cputm;
2741 /* if cputm_start is 0, accounting is being started/stopped */
2742 if (likely(vcpu->arch.cputm_start))
2743 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2744 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2749 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2752 gmap_enable(vcpu->arch.enabled_gmap);
2753 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2754 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2755 __start_cpu_timer_accounting(vcpu);
2759 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2762 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2763 __stop_cpu_timer_accounting(vcpu);
2764 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2765 vcpu->arch.enabled_gmap = gmap_get_enabled();
2766 gmap_disable(vcpu->arch.enabled_gmap);
2770 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2772 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2773 vcpu->arch.sie_block->gpsw.mask = 0UL;
2774 vcpu->arch.sie_block->gpsw.addr = 0UL;
2775 kvm_s390_set_prefix(vcpu, 0);
2776 kvm_s390_set_cpu_timer(vcpu, 0);
2777 vcpu->arch.sie_block->ckc = 0UL;
2778 vcpu->arch.sie_block->todpr = 0;
2779 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2780 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2781 CR0_INTERRUPT_KEY_SUBMASK |
2782 CR0_MEASUREMENT_ALERT_SUBMASK;
2783 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2785 CR14_EXTERNAL_DAMAGE_SUBMASK;
2786 /* make sure the new fpc will be lazily loaded */
2788 current->thread.fpu.fpc = 0;
2789 vcpu->arch.sie_block->gbea = 1;
2790 vcpu->arch.sie_block->pp = 0;
2791 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2792 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2793 kvm_clear_async_pf_completion_queue(vcpu);
2794 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2795 kvm_s390_vcpu_stop(vcpu);
2796 kvm_s390_clear_local_irqs(vcpu);
2799 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2801 mutex_lock(&vcpu->kvm->lock);
2803 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2804 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2806 mutex_unlock(&vcpu->kvm->lock);
2807 if (!kvm_is_ucontrol(vcpu->kvm)) {
2808 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2811 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2812 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2813 /* make vcpu_load load the right gmap on the first trigger */
2814 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2817 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2820 * If the AP instructions are not being interpreted and the MSAX3
2821 * facility is not configured for the guest, there is nothing to set up.
2823 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2826 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2827 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2828 vcpu->arch.sie_block->eca &= ~ECA_APIE;
2830 if (vcpu->kvm->arch.crypto.apie)
2831 vcpu->arch.sie_block->eca |= ECA_APIE;
2833 /* Set up protected key support */
2834 if (vcpu->kvm->arch.crypto.aes_kw)
2835 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2836 if (vcpu->kvm->arch.crypto.dea_kw)
2837 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2840 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2842 free_page(vcpu->arch.sie_block->cbrlo);
2843 vcpu->arch.sie_block->cbrlo = 0;
2846 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2848 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2849 if (!vcpu->arch.sie_block->cbrlo)
2854 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2856 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2858 vcpu->arch.sie_block->ibc = model->ibc;
2859 if (test_kvm_facility(vcpu->kvm, 7))
2860 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2863 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2867 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2871 if (test_kvm_facility(vcpu->kvm, 78))
2872 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2873 else if (test_kvm_facility(vcpu->kvm, 8))
2874 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2876 kvm_s390_vcpu_setup_model(vcpu);
2878 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2879 if (MACHINE_HAS_ESOP)
2880 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2881 if (test_kvm_facility(vcpu->kvm, 9))
2882 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2883 if (test_kvm_facility(vcpu->kvm, 73))
2884 vcpu->arch.sie_block->ecb |= ECB_TE;
2886 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2887 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2888 if (test_kvm_facility(vcpu->kvm, 130))
2889 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2890 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2892 vcpu->arch.sie_block->eca |= ECA_CEI;
2894 vcpu->arch.sie_block->eca |= ECA_IB;
2896 vcpu->arch.sie_block->eca |= ECA_SII;
2897 if (sclp.has_sigpif)
2898 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2899 if (test_kvm_facility(vcpu->kvm, 129)) {
2900 vcpu->arch.sie_block->eca |= ECA_VX;
2901 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2903 if (test_kvm_facility(vcpu->kvm, 139))
2904 vcpu->arch.sie_block->ecd |= ECD_MEF;
2905 if (test_kvm_facility(vcpu->kvm, 156))
2906 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2907 if (vcpu->arch.sie_block->gd) {
2908 vcpu->arch.sie_block->eca |= ECA_AIV;
2909 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2910 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2912 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2914 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2917 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2919 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2921 if (vcpu->kvm->arch.use_cmma) {
2922 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2926 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2927 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2929 vcpu->arch.sie_block->hpid = HPID_KVM;
2931 kvm_s390_vcpu_crypto_setup(vcpu);
2936 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2939 struct kvm_vcpu *vcpu;
2940 struct sie_page *sie_page;
2943 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2948 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2952 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2953 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2957 vcpu->arch.sie_block = &sie_page->sie_block;
2958 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2960 /* the real guest size will always be smaller than msl */
2961 vcpu->arch.sie_block->mso = 0;
2962 vcpu->arch.sie_block->msl = sclp.hamax;
2964 vcpu->arch.sie_block->icpua = id;
2965 spin_lock_init(&vcpu->arch.local_int.lock);
2966 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
2967 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2968 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2969 seqcount_init(&vcpu->arch.cputm_seqcount);
2971 rc = kvm_vcpu_init(vcpu, kvm, id);
2973 goto out_free_sie_block;
2974 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2975 vcpu->arch.sie_block);
2976 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2980 free_page((unsigned long)(vcpu->arch.sie_block));
2982 kmem_cache_free(kvm_vcpu_cache, vcpu);
2987 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2989 return kvm_s390_vcpu_has_irq(vcpu, 0);
2992 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2994 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2997 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2999 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3003 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3005 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3008 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3010 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3014 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3016 return atomic_read(&vcpu->arch.sie_block->prog20) &
3017 (PROG_BLOCK_SIE | PROG_REQUEST);
3020 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3022 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3026 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3027 * If the CPU is not running (e.g. waiting as idle) the function will
3028 * return immediately. */
3029 void exit_sie(struct kvm_vcpu *vcpu)
3031 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3032 kvm_s390_vsie_kick(vcpu);
3033 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3037 /* Kick a guest cpu out of SIE to process a request synchronously */
3038 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3040 kvm_make_request(req, vcpu);
3041 kvm_s390_vcpu_request(vcpu);
3044 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3047 struct kvm *kvm = gmap->private;
3048 struct kvm_vcpu *vcpu;
3049 unsigned long prefix;
3052 if (gmap_is_shadow(gmap))
3054 if (start >= 1UL << 31)
3055 /* We are only interested in prefix pages */
3057 kvm_for_each_vcpu(i, vcpu, kvm) {
3058 /* match against both prefix pages */
3059 prefix = kvm_s390_get_prefix(vcpu);
3060 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3061 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3063 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3068 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3070 /* kvm common code refers to this, but never calls it */
3075 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3076 struct kvm_one_reg *reg)
3081 case KVM_REG_S390_TODPR:
3082 r = put_user(vcpu->arch.sie_block->todpr,
3083 (u32 __user *)reg->addr);
3085 case KVM_REG_S390_EPOCHDIFF:
3086 r = put_user(vcpu->arch.sie_block->epoch,
3087 (u64 __user *)reg->addr);
3089 case KVM_REG_S390_CPU_TIMER:
3090 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3091 (u64 __user *)reg->addr);
3093 case KVM_REG_S390_CLOCK_COMP:
3094 r = put_user(vcpu->arch.sie_block->ckc,
3095 (u64 __user *)reg->addr);
3097 case KVM_REG_S390_PFTOKEN:
3098 r = put_user(vcpu->arch.pfault_token,
3099 (u64 __user *)reg->addr);
3101 case KVM_REG_S390_PFCOMPARE:
3102 r = put_user(vcpu->arch.pfault_compare,
3103 (u64 __user *)reg->addr);
3105 case KVM_REG_S390_PFSELECT:
3106 r = put_user(vcpu->arch.pfault_select,
3107 (u64 __user *)reg->addr);
3109 case KVM_REG_S390_PP:
3110 r = put_user(vcpu->arch.sie_block->pp,
3111 (u64 __user *)reg->addr);
3113 case KVM_REG_S390_GBEA:
3114 r = put_user(vcpu->arch.sie_block->gbea,
3115 (u64 __user *)reg->addr);
3124 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3125 struct kvm_one_reg *reg)
3131 case KVM_REG_S390_TODPR:
3132 r = get_user(vcpu->arch.sie_block->todpr,
3133 (u32 __user *)reg->addr);
3135 case KVM_REG_S390_EPOCHDIFF:
3136 r = get_user(vcpu->arch.sie_block->epoch,
3137 (u64 __user *)reg->addr);
3139 case KVM_REG_S390_CPU_TIMER:
3140 r = get_user(val, (u64 __user *)reg->addr);
3142 kvm_s390_set_cpu_timer(vcpu, val);
3144 case KVM_REG_S390_CLOCK_COMP:
3145 r = get_user(vcpu->arch.sie_block->ckc,
3146 (u64 __user *)reg->addr);
3148 case KVM_REG_S390_PFTOKEN:
3149 r = get_user(vcpu->arch.pfault_token,
3150 (u64 __user *)reg->addr);
3151 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3152 kvm_clear_async_pf_completion_queue(vcpu);
3154 case KVM_REG_S390_PFCOMPARE:
3155 r = get_user(vcpu->arch.pfault_compare,
3156 (u64 __user *)reg->addr);
3158 case KVM_REG_S390_PFSELECT:
3159 r = get_user(vcpu->arch.pfault_select,
3160 (u64 __user *)reg->addr);
3162 case KVM_REG_S390_PP:
3163 r = get_user(vcpu->arch.sie_block->pp,
3164 (u64 __user *)reg->addr);
3166 case KVM_REG_S390_GBEA:
3167 r = get_user(vcpu->arch.sie_block->gbea,
3168 (u64 __user *)reg->addr);
3177 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3179 kvm_s390_vcpu_initial_reset(vcpu);
3183 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3186 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3191 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3194 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3199 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3200 struct kvm_sregs *sregs)
3204 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3205 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3211 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3212 struct kvm_sregs *sregs)
3216 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3217 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3223 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3229 if (test_fp_ctl(fpu->fpc)) {
3233 vcpu->run->s.regs.fpc = fpu->fpc;
3235 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3236 (freg_t *) fpu->fprs);
3238 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3245 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3249 /* make sure we have the latest values */
3252 convert_vx_to_fp((freg_t *) fpu->fprs,
3253 (__vector128 *) vcpu->run->s.regs.vrs);
3255 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3256 fpu->fpc = vcpu->run->s.regs.fpc;
3262 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3266 if (!is_vcpu_stopped(vcpu))
3269 vcpu->run->psw_mask = psw.mask;
3270 vcpu->run->psw_addr = psw.addr;
3275 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3276 struct kvm_translation *tr)
3278 return -EINVAL; /* not implemented yet */
3281 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3282 KVM_GUESTDBG_USE_HW_BP | \
3283 KVM_GUESTDBG_ENABLE)
3285 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3286 struct kvm_guest_debug *dbg)
3292 vcpu->guest_debug = 0;
3293 kvm_s390_clear_bp_data(vcpu);
3295 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3299 if (!sclp.has_gpere) {
3304 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3305 vcpu->guest_debug = dbg->control;
3306 /* enforce guest PER */
3307 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3309 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3310 rc = kvm_s390_import_bp_data(vcpu, dbg);
3312 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3313 vcpu->arch.guestdbg.last_bp = 0;
3317 vcpu->guest_debug = 0;
3318 kvm_s390_clear_bp_data(vcpu);
3319 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3327 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3328 struct kvm_mp_state *mp_state)
3334 /* CHECK_STOP and LOAD are not supported yet */
3335 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3336 KVM_MP_STATE_OPERATING;
3342 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3343 struct kvm_mp_state *mp_state)
3349 /* user space knows about this interface - let it control the state */
3350 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3352 switch (mp_state->mp_state) {
3353 case KVM_MP_STATE_STOPPED:
3354 kvm_s390_vcpu_stop(vcpu);
3356 case KVM_MP_STATE_OPERATING:
3357 kvm_s390_vcpu_start(vcpu);
3359 case KVM_MP_STATE_LOAD:
3360 case KVM_MP_STATE_CHECK_STOP:
3361 /* fall through - CHECK_STOP and LOAD are not supported yet */
3370 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3372 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3375 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3378 kvm_s390_vcpu_request_handled(vcpu);
3379 if (!kvm_request_pending(vcpu))
3382 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3383 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3384 * This ensures that the ipte instruction for this request has
3385 * already finished. We might race against a second unmapper that
3386 * wants to set the blocking bit. Lets just retry the request loop.
3388 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3390 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3391 kvm_s390_get_prefix(vcpu),
3392 PAGE_SIZE * 2, PROT_WRITE);
3394 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3400 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3401 vcpu->arch.sie_block->ihcpu = 0xffff;
3405 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3406 if (!ibs_enabled(vcpu)) {
3407 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3408 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3413 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3414 if (ibs_enabled(vcpu)) {
3415 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3416 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3421 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3422 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3426 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3428 * Disable CMM virtualization; we will emulate the ESSA
3429 * instruction manually, in order to provide additional
3430 * functionalities needed for live migration.
3432 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3436 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3438 * Re-enable CMM virtualization if CMMA is available and
3439 * CMM has been used.
3441 if ((vcpu->kvm->arch.use_cmma) &&
3442 (vcpu->kvm->mm->context.uses_cmm))
3443 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3447 /* nothing to do, just clear the request */
3448 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3449 /* we left the vsie handler, nothing to do, just clear the request */
3450 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3455 void kvm_s390_set_tod_clock(struct kvm *kvm,
3456 const struct kvm_s390_vm_tod_clock *gtod)
3458 struct kvm_vcpu *vcpu;
3459 struct kvm_s390_tod_clock_ext htod;
3462 mutex_lock(&kvm->lock);
3465 get_tod_clock_ext((char *)&htod);
3467 kvm->arch.epoch = gtod->tod - htod.tod;
3469 if (test_kvm_facility(kvm, 139)) {
3470 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3471 if (kvm->arch.epoch > gtod->tod)
3472 kvm->arch.epdx -= 1;
3475 kvm_s390_vcpu_block_all(kvm);
3476 kvm_for_each_vcpu(i, vcpu, kvm) {
3477 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3478 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3481 kvm_s390_vcpu_unblock_all(kvm);
3483 mutex_unlock(&kvm->lock);
3487 * kvm_arch_fault_in_page - fault-in guest page if necessary
3488 * @vcpu: The corresponding virtual cpu
3489 * @gpa: Guest physical address
3490 * @writable: Whether the page should be writable or not
3492 * Make sure that a guest page has been faulted-in on the host.
3494 * Return: Zero on success, negative error code otherwise.
3496 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3498 return gmap_fault(vcpu->arch.gmap, gpa,
3499 writable ? FAULT_FLAG_WRITE : 0);
3502 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3503 unsigned long token)
3505 struct kvm_s390_interrupt inti;
3506 struct kvm_s390_irq irq;
3509 irq.u.ext.ext_params2 = token;
3510 irq.type = KVM_S390_INT_PFAULT_INIT;
3511 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3513 inti.type = KVM_S390_INT_PFAULT_DONE;
3514 inti.parm64 = token;
3515 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3519 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3520 struct kvm_async_pf *work)
3522 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3523 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3526 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3527 struct kvm_async_pf *work)
3529 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3530 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3533 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3534 struct kvm_async_pf *work)
3536 /* s390 will always inject the page directly */
3539 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3542 * s390 will always inject the page directly,
3543 * but we still want check_async_completion to cleanup
3548 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3551 struct kvm_arch_async_pf arch;
3554 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3556 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3557 vcpu->arch.pfault_compare)
3559 if (psw_extint_disabled(vcpu))
3561 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3563 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3565 if (!vcpu->arch.gmap->pfault_enabled)
3568 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3569 hva += current->thread.gmap_addr & ~PAGE_MASK;
3570 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3573 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3577 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3582 * On s390 notifications for arriving pages will be delivered directly
3583 * to the guest but the house keeping for completed pfaults is
3584 * handled outside the worker.
3586 kvm_check_async_pf_completion(vcpu);
3588 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3589 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3594 if (test_cpu_flag(CIF_MCCK_PENDING))
3597 if (!kvm_is_ucontrol(vcpu->kvm)) {
3598 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3603 rc = kvm_s390_handle_requests(vcpu);
3607 if (guestdbg_enabled(vcpu)) {
3608 kvm_s390_backup_guest_per_regs(vcpu);
3609 kvm_s390_patch_guest_per_regs(vcpu);
3612 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3614 vcpu->arch.sie_block->icptcode = 0;
3615 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3616 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3617 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3622 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3624 struct kvm_s390_pgm_info pgm_info = {
3625 .code = PGM_ADDRESSING,
3630 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3631 trace_kvm_s390_sie_fault(vcpu);
3634 * We want to inject an addressing exception, which is defined as a
3635 * suppressing or terminating exception. However, since we came here
3636 * by a DAT access exception, the PSW still points to the faulting
3637 * instruction since DAT exceptions are nullifying. So we've got
3638 * to look up the current opcode to get the length of the instruction
3639 * to be able to forward the PSW.
3641 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3642 ilen = insn_length(opcode);
3646 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3647 * Forward by arbitrary ilc, injection will take care of
3648 * nullification if necessary.
3650 pgm_info = vcpu->arch.pgm;
3653 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3654 kvm_s390_forward_psw(vcpu, ilen);
3655 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3658 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3660 struct mcck_volatile_info *mcck_info;
3661 struct sie_page *sie_page;
3663 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3664 vcpu->arch.sie_block->icptcode);
3665 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3667 if (guestdbg_enabled(vcpu))
3668 kvm_s390_restore_guest_per_regs(vcpu);
3670 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3671 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3673 if (exit_reason == -EINTR) {
3674 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3675 sie_page = container_of(vcpu->arch.sie_block,
3676 struct sie_page, sie_block);
3677 mcck_info = &sie_page->mcck_info;
3678 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3682 if (vcpu->arch.sie_block->icptcode > 0) {
3683 int rc = kvm_handle_sie_intercept(vcpu);
3685 if (rc != -EOPNOTSUPP)
3687 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3688 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3689 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3690 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3692 } else if (exit_reason != -EFAULT) {
3693 vcpu->stat.exit_null++;
3695 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3696 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3697 vcpu->run->s390_ucontrol.trans_exc_code =
3698 current->thread.gmap_addr;
3699 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3701 } else if (current->thread.gmap_pfault) {
3702 trace_kvm_s390_major_guest_pfault(vcpu);
3703 current->thread.gmap_pfault = 0;
3704 if (kvm_arch_setup_async_pf(vcpu))
3706 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3708 return vcpu_post_run_fault_in_sie(vcpu);
3711 static int __vcpu_run(struct kvm_vcpu *vcpu)
3713 int rc, exit_reason;
3716 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3717 * ning the guest), so that memslots (and other stuff) are protected
3719 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3722 rc = vcpu_pre_run(vcpu);
3726 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3728 * As PF_VCPU will be used in fault handler, between
3729 * guest_enter and guest_exit should be no uaccess.
3731 local_irq_disable();
3732 guest_enter_irqoff();
3733 __disable_cpu_timer_accounting(vcpu);
3735 exit_reason = sie64a(vcpu->arch.sie_block,
3736 vcpu->run->s.regs.gprs);
3737 local_irq_disable();
3738 __enable_cpu_timer_accounting(vcpu);
3739 guest_exit_irqoff();
3741 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3743 rc = vcpu_post_run(vcpu, exit_reason);
3744 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3746 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3750 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3752 struct runtime_instr_cb *riccb;
3755 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3756 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3757 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3758 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3759 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3760 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3761 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3762 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3763 /* some control register changes require a tlb flush */
3764 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3766 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3767 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3768 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3769 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3770 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3771 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3773 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3774 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3775 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3776 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3777 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3778 kvm_clear_async_pf_completion_queue(vcpu);
3781 * If userspace sets the riccb (e.g. after migration) to a valid state,
3782 * we should enable RI here instead of doing the lazy enablement.
3784 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3785 test_kvm_facility(vcpu->kvm, 64) &&
3787 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3788 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3789 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3792 * If userspace sets the gscb (e.g. after migration) to non-zero,
3793 * we should enable GS here instead of doing the lazy enablement.
3795 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3796 test_kvm_facility(vcpu->kvm, 133) &&
3798 !vcpu->arch.gs_enabled) {
3799 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3800 vcpu->arch.sie_block->ecb |= ECB_GS;
3801 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3802 vcpu->arch.gs_enabled = 1;
3804 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3805 test_kvm_facility(vcpu->kvm, 82)) {
3806 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3807 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3809 save_access_regs(vcpu->arch.host_acrs);
3810 restore_access_regs(vcpu->run->s.regs.acrs);
3811 /* save host (userspace) fprs/vrs */
3813 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3814 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3816 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3818 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3819 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3820 if (test_fp_ctl(current->thread.fpu.fpc))
3821 /* User space provided an invalid FPC, let's clear it */
3822 current->thread.fpu.fpc = 0;
3823 if (MACHINE_HAS_GS) {
3825 __ctl_set_bit(2, 4);
3826 if (current->thread.gs_cb) {
3827 vcpu->arch.host_gscb = current->thread.gs_cb;
3828 save_gs_cb(vcpu->arch.host_gscb);
3830 if (vcpu->arch.gs_enabled) {
3831 current->thread.gs_cb = (struct gs_cb *)
3832 &vcpu->run->s.regs.gscb;
3833 restore_gs_cb(current->thread.gs_cb);
3837 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3839 kvm_run->kvm_dirty_regs = 0;
3842 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3844 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3845 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3846 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3847 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3848 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3849 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3850 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3851 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3852 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3853 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3854 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3855 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3856 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3857 save_access_regs(vcpu->run->s.regs.acrs);
3858 restore_access_regs(vcpu->arch.host_acrs);
3859 /* Save guest register state */
3861 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3862 /* Restore will be done lazily at return */
3863 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3864 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3865 if (MACHINE_HAS_GS) {
3866 __ctl_set_bit(2, 4);
3867 if (vcpu->arch.gs_enabled)
3868 save_gs_cb(current->thread.gs_cb);
3870 current->thread.gs_cb = vcpu->arch.host_gscb;
3871 restore_gs_cb(vcpu->arch.host_gscb);
3873 if (!vcpu->arch.host_gscb)
3874 __ctl_clear_bit(2, 4);
3875 vcpu->arch.host_gscb = NULL;
3877 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3880 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3884 if (kvm_run->immediate_exit)
3889 if (guestdbg_exit_pending(vcpu)) {
3890 kvm_s390_prepare_debug_exit(vcpu);
3895 kvm_sigset_activate(vcpu);
3897 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3898 kvm_s390_vcpu_start(vcpu);
3899 } else if (is_vcpu_stopped(vcpu)) {
3900 pr_err_ratelimited("can't run stopped vcpu %d\n",
3906 sync_regs(vcpu, kvm_run);
3907 enable_cpu_timer_accounting(vcpu);
3910 rc = __vcpu_run(vcpu);
3912 if (signal_pending(current) && !rc) {
3913 kvm_run->exit_reason = KVM_EXIT_INTR;
3917 if (guestdbg_exit_pending(vcpu) && !rc) {
3918 kvm_s390_prepare_debug_exit(vcpu);
3922 if (rc == -EREMOTE) {
3923 /* userspace support is needed, kvm_run has been prepared */
3927 disable_cpu_timer_accounting(vcpu);
3928 store_regs(vcpu, kvm_run);
3930 kvm_sigset_deactivate(vcpu);
3932 vcpu->stat.exit_userspace++;
3939 * store status at address
3940 * we use have two special cases:
3941 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3942 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3944 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3946 unsigned char archmode = 1;
3947 freg_t fprs[NUM_FPRS];
3952 px = kvm_s390_get_prefix(vcpu);
3953 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3954 if (write_guest_abs(vcpu, 163, &archmode, 1))
3957 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3958 if (write_guest_real(vcpu, 163, &archmode, 1))
3962 gpa -= __LC_FPREGS_SAVE_AREA;
3964 /* manually convert vector registers if necessary */
3965 if (MACHINE_HAS_VX) {
3966 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3967 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3970 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3971 vcpu->run->s.regs.fprs, 128);
3973 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3974 vcpu->run->s.regs.gprs, 128);
3975 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3976 &vcpu->arch.sie_block->gpsw, 16);
3977 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3979 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3980 &vcpu->run->s.regs.fpc, 4);
3981 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3982 &vcpu->arch.sie_block->todpr, 4);
3983 cputm = kvm_s390_get_cpu_timer(vcpu);
3984 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3986 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3987 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3989 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3990 &vcpu->run->s.regs.acrs, 64);
3991 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3992 &vcpu->arch.sie_block->gcr, 128);
3993 return rc ? -EFAULT : 0;
3996 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3999 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4000 * switch in the run ioctl. Let's update our copies before we save
4001 * it into the save area
4004 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4005 save_access_regs(vcpu->run->s.regs.acrs);
4007 return kvm_s390_store_status_unloaded(vcpu, addr);
4010 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4012 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4013 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4016 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4019 struct kvm_vcpu *vcpu;
4021 kvm_for_each_vcpu(i, vcpu, kvm) {
4022 __disable_ibs_on_vcpu(vcpu);
4026 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4030 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4031 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4034 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4036 int i, online_vcpus, started_vcpus = 0;
4038 if (!is_vcpu_stopped(vcpu))
4041 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4042 /* Only one cpu at a time may enter/leave the STOPPED state. */
4043 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4044 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4046 for (i = 0; i < online_vcpus; i++) {
4047 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4051 if (started_vcpus == 0) {
4052 /* we're the only active VCPU -> speed it up */
4053 __enable_ibs_on_vcpu(vcpu);
4054 } else if (started_vcpus == 1) {
4056 * As we are starting a second VCPU, we have to disable
4057 * the IBS facility on all VCPUs to remove potentially
4058 * oustanding ENABLE requests.
4060 __disable_ibs_on_all_vcpus(vcpu->kvm);
4063 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4065 * Another VCPU might have used IBS while we were offline.
4066 * Let's play safe and flush the VCPU at startup.
4068 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4069 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4073 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4075 int i, online_vcpus, started_vcpus = 0;
4076 struct kvm_vcpu *started_vcpu = NULL;
4078 if (is_vcpu_stopped(vcpu))
4081 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4082 /* Only one cpu at a time may enter/leave the STOPPED state. */
4083 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4084 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4086 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4087 kvm_s390_clear_stop_irq(vcpu);
4089 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4090 __disable_ibs_on_vcpu(vcpu);
4092 for (i = 0; i < online_vcpus; i++) {
4093 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4095 started_vcpu = vcpu->kvm->vcpus[i];
4099 if (started_vcpus == 1) {
4101 * As we only have one VCPU left, we want to enable the
4102 * IBS facility for that VCPU to speed it up.
4104 __enable_ibs_on_vcpu(started_vcpu);
4107 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4111 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4112 struct kvm_enable_cap *cap)
4120 case KVM_CAP_S390_CSS_SUPPORT:
4121 if (!vcpu->kvm->arch.css_support) {
4122 vcpu->kvm->arch.css_support = 1;
4123 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4124 trace_kvm_s390_enable_css(vcpu->kvm);
4135 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4136 struct kvm_s390_mem_op *mop)
4138 void __user *uaddr = (void __user *)mop->buf;
4139 void *tmpbuf = NULL;
4141 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4142 | KVM_S390_MEMOP_F_CHECK_ONLY;
4144 if (mop->flags & ~supported_flags)
4147 if (mop->size > MEM_OP_MAX_SIZE)
4150 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4151 tmpbuf = vmalloc(mop->size);
4156 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4159 case KVM_S390_MEMOP_LOGICAL_READ:
4160 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4161 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4162 mop->size, GACC_FETCH);
4165 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4167 if (copy_to_user(uaddr, tmpbuf, mop->size))
4171 case KVM_S390_MEMOP_LOGICAL_WRITE:
4172 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4173 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4174 mop->size, GACC_STORE);
4177 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4181 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4187 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4189 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4190 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4196 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4197 unsigned int ioctl, unsigned long arg)
4199 struct kvm_vcpu *vcpu = filp->private_data;
4200 void __user *argp = (void __user *)arg;
4203 case KVM_S390_IRQ: {
4204 struct kvm_s390_irq s390irq;
4206 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4208 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4210 case KVM_S390_INTERRUPT: {
4211 struct kvm_s390_interrupt s390int;
4212 struct kvm_s390_irq s390irq;
4214 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4216 if (s390int_to_s390irq(&s390int, &s390irq))
4218 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4221 return -ENOIOCTLCMD;
4224 long kvm_arch_vcpu_ioctl(struct file *filp,
4225 unsigned int ioctl, unsigned long arg)
4227 struct kvm_vcpu *vcpu = filp->private_data;
4228 void __user *argp = (void __user *)arg;
4235 case KVM_S390_STORE_STATUS:
4236 idx = srcu_read_lock(&vcpu->kvm->srcu);
4237 r = kvm_s390_vcpu_store_status(vcpu, arg);
4238 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4240 case KVM_S390_SET_INITIAL_PSW: {
4244 if (copy_from_user(&psw, argp, sizeof(psw)))
4246 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4249 case KVM_S390_INITIAL_RESET:
4250 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4252 case KVM_SET_ONE_REG:
4253 case KVM_GET_ONE_REG: {
4254 struct kvm_one_reg reg;
4256 if (copy_from_user(®, argp, sizeof(reg)))
4258 if (ioctl == KVM_SET_ONE_REG)
4259 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4261 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4264 #ifdef CONFIG_KVM_S390_UCONTROL
4265 case KVM_S390_UCAS_MAP: {
4266 struct kvm_s390_ucas_mapping ucasmap;
4268 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4273 if (!kvm_is_ucontrol(vcpu->kvm)) {
4278 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4279 ucasmap.vcpu_addr, ucasmap.length);
4282 case KVM_S390_UCAS_UNMAP: {
4283 struct kvm_s390_ucas_mapping ucasmap;
4285 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4290 if (!kvm_is_ucontrol(vcpu->kvm)) {
4295 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4300 case KVM_S390_VCPU_FAULT: {
4301 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4304 case KVM_ENABLE_CAP:
4306 struct kvm_enable_cap cap;
4308 if (copy_from_user(&cap, argp, sizeof(cap)))
4310 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4313 case KVM_S390_MEM_OP: {
4314 struct kvm_s390_mem_op mem_op;
4316 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4317 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4322 case KVM_S390_SET_IRQ_STATE: {
4323 struct kvm_s390_irq_state irq_state;
4326 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4328 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4329 irq_state.len == 0 ||
4330 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4334 /* do not use irq_state.flags, it will break old QEMUs */
4335 r = kvm_s390_set_irq_state(vcpu,
4336 (void __user *) irq_state.buf,
4340 case KVM_S390_GET_IRQ_STATE: {
4341 struct kvm_s390_irq_state irq_state;
4344 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4346 if (irq_state.len == 0) {
4350 /* do not use irq_state.flags, it will break old QEMUs */
4351 r = kvm_s390_get_irq_state(vcpu,
4352 (__u8 __user *) irq_state.buf,
4364 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4366 #ifdef CONFIG_KVM_S390_UCONTROL
4367 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4368 && (kvm_is_ucontrol(vcpu->kvm))) {
4369 vmf->page = virt_to_page(vcpu->arch.sie_block);
4370 get_page(vmf->page);
4374 return VM_FAULT_SIGBUS;
4377 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4378 unsigned long npages)
4383 /* Section: memory related */
4384 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4385 struct kvm_memory_slot *memslot,
4386 const struct kvm_userspace_memory_region *mem,
4387 enum kvm_mr_change change)
4389 /* A few sanity checks. We can have memory slots which have to be
4390 located/ended at a segment boundary (1MB). The memory in userland is
4391 ok to be fragmented into various different vmas. It is okay to mmap()
4392 and munmap() stuff in this slot after doing this call at any time */
4394 if (mem->userspace_addr & 0xffffful)
4397 if (mem->memory_size & 0xffffful)
4400 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4406 void kvm_arch_commit_memory_region(struct kvm *kvm,
4407 const struct kvm_userspace_memory_region *mem,
4408 const struct kvm_memory_slot *old,
4409 const struct kvm_memory_slot *new,
4410 enum kvm_mr_change change)
4414 /* If the basics of the memslot do not change, we do not want
4415 * to update the gmap. Every update causes several unnecessary
4416 * segment translation exceptions. This is usually handled just
4417 * fine by the normal fault handler + gmap, but it will also
4418 * cause faults on the prefix page of running guest CPUs.
4420 if (old->userspace_addr == mem->userspace_addr &&
4421 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4422 old->npages * PAGE_SIZE == mem->memory_size)
4425 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4426 mem->guest_phys_addr, mem->memory_size);
4428 pr_warn("failed to commit memory region\n");
4432 static inline unsigned long nonhyp_mask(int i)
4434 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4436 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4439 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4441 vcpu->valid_wakeup = false;
4444 static int __init kvm_s390_init(void)
4448 if (!sclp.has_sief2) {
4449 pr_info("SIE is not available\n");
4453 if (nested && hpage) {
4454 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4458 for (i = 0; i < 16; i++)
4459 kvm_s390_fac_base[i] |=
4460 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4462 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4465 static void __exit kvm_s390_exit(void)
4470 module_init(kvm_s390_init);
4471 module_exit(kvm_s390_exit);
4474 * Enable autoloading of the kvm module.
4475 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4476 * since x86 takes a different approach.
4478 #include <linux/miscdevice.h>
4479 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4480 MODULE_ALIAS("devname:kvm");