1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
38 #include <asm/pgtable.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
51 #define CREATE_TRACE_POINTS
53 #include "trace-s390.h"
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61 VCPU_STAT("userspace_handled", exit_userspace),
62 VCPU_STAT("exit_null", exit_null),
63 VCPU_STAT("exit_validity", exit_validity),
64 VCPU_STAT("exit_stop_request", exit_stop_request),
65 VCPU_STAT("exit_external_request", exit_external_request),
66 VCPU_STAT("exit_io_request", exit_io_request),
67 VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68 VCPU_STAT("exit_instruction", exit_instruction),
69 VCPU_STAT("exit_pei", exit_pei),
70 VCPU_STAT("exit_program_interruption", exit_program_interruption),
71 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72 VCPU_STAT("exit_operation_exception", exit_operation_exception),
73 VCPU_STAT("halt_successful_poll", halt_successful_poll),
74 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77 VCPU_STAT("halt_wakeup", halt_wakeup),
78 VCPU_STAT("instruction_lctlg", instruction_lctlg),
79 VCPU_STAT("instruction_lctl", instruction_lctl),
80 VCPU_STAT("instruction_stctl", instruction_stctl),
81 VCPU_STAT("instruction_stctg", instruction_stctg),
82 VCPU_STAT("deliver_ckc", deliver_ckc),
83 VCPU_STAT("deliver_cputm", deliver_cputm),
84 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
85 VCPU_STAT("deliver_external_call", deliver_external_call),
86 VCPU_STAT("deliver_service_signal", deliver_service_signal),
87 VCPU_STAT("deliver_virtio", deliver_virtio),
88 VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
89 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
90 VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
91 VCPU_STAT("deliver_program", deliver_program),
92 VCPU_STAT("deliver_io", deliver_io),
93 VCPU_STAT("deliver_machine_check", deliver_machine_check),
94 VCPU_STAT("exit_wait_state", exit_wait_state),
95 VCPU_STAT("inject_ckc", inject_ckc),
96 VCPU_STAT("inject_cputm", inject_cputm),
97 VCPU_STAT("inject_external_call", inject_external_call),
98 VM_STAT("inject_float_mchk", inject_float_mchk),
99 VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
100 VM_STAT("inject_io", inject_io),
101 VCPU_STAT("inject_mchk", inject_mchk),
102 VM_STAT("inject_pfault_done", inject_pfault_done),
103 VCPU_STAT("inject_program", inject_program),
104 VCPU_STAT("inject_restart", inject_restart),
105 VM_STAT("inject_service_signal", inject_service_signal),
106 VCPU_STAT("inject_set_prefix", inject_set_prefix),
107 VCPU_STAT("inject_stop_signal", inject_stop_signal),
108 VCPU_STAT("inject_pfault_init", inject_pfault_init),
109 VM_STAT("inject_virtio", inject_virtio),
110 VCPU_STAT("instruction_epsw", instruction_epsw),
111 VCPU_STAT("instruction_gs", instruction_gs),
112 VCPU_STAT("instruction_io_other", instruction_io_other),
113 VCPU_STAT("instruction_lpsw", instruction_lpsw),
114 VCPU_STAT("instruction_lpswe", instruction_lpswe),
115 VCPU_STAT("instruction_pfmf", instruction_pfmf),
116 VCPU_STAT("instruction_ptff", instruction_ptff),
117 VCPU_STAT("instruction_stidp", instruction_stidp),
118 VCPU_STAT("instruction_sck", instruction_sck),
119 VCPU_STAT("instruction_sckpf", instruction_sckpf),
120 VCPU_STAT("instruction_spx", instruction_spx),
121 VCPU_STAT("instruction_stpx", instruction_stpx),
122 VCPU_STAT("instruction_stap", instruction_stap),
123 VCPU_STAT("instruction_iske", instruction_iske),
124 VCPU_STAT("instruction_ri", instruction_ri),
125 VCPU_STAT("instruction_rrbe", instruction_rrbe),
126 VCPU_STAT("instruction_sske", instruction_sske),
127 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
128 VCPU_STAT("instruction_essa", instruction_essa),
129 VCPU_STAT("instruction_stsi", instruction_stsi),
130 VCPU_STAT("instruction_stfl", instruction_stfl),
131 VCPU_STAT("instruction_tb", instruction_tb),
132 VCPU_STAT("instruction_tpi", instruction_tpi),
133 VCPU_STAT("instruction_tprot", instruction_tprot),
134 VCPU_STAT("instruction_tsch", instruction_tsch),
135 VCPU_STAT("instruction_sthyi", instruction_sthyi),
136 VCPU_STAT("instruction_sie", instruction_sie),
137 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
138 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
139 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
140 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
141 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
142 VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
143 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
144 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
145 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
146 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
147 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
148 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
149 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
150 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
151 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
152 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
153 VCPU_STAT("instruction_diag_10", diagnose_10),
154 VCPU_STAT("instruction_diag_44", diagnose_44),
155 VCPU_STAT("instruction_diag_9c", diagnose_9c),
156 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
157 VCPU_STAT("instruction_diag_258", diagnose_258),
158 VCPU_STAT("instruction_diag_308", diagnose_308),
159 VCPU_STAT("instruction_diag_500", diagnose_500),
160 VCPU_STAT("instruction_diag_other", diagnose_other),
164 struct kvm_s390_tod_clock_ext {
170 /* allow nested virtualization in KVM (if enabled by user space) */
172 module_param(nested, int, S_IRUGO);
173 MODULE_PARM_DESC(nested, "Nested virtualization support");
175 /* allow 1m huge page guest backing, if !nested */
177 module_param(hpage, int, 0444);
178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
180 /* maximum percentage of steal time for polling. >100 is treated like 100 */
181 static u8 halt_poll_max_steal = 10;
182 module_param(halt_poll_max_steal, byte, 0644);
183 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
185 /* if set to true, the GISA will be initialized and used if available */
186 static bool use_gisa = true;
187 module_param(use_gisa, bool, 0644);
188 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191 * For now we handle at most 16 double words as this is what the s390 base
192 * kernel handles and stores in the prefix page. If we ever need to go beyond
193 * this, this requires changes to code, but the external uapi can stay.
195 #define SIZE_INTERNAL 16
198 * Base feature mask that defines default mask for facilities. Consists of the
199 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
201 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
203 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
204 * and defines the facilities that can be enabled via a cpu model.
206 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
208 static unsigned long kvm_s390_fac_size(void)
210 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
211 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
212 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
213 sizeof(S390_lowcore.stfle_fac_list));
215 return SIZE_INTERNAL;
218 /* available cpu features supported by kvm */
219 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
220 /* available subfunctions indicated via query / "test bit" */
221 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
223 static struct gmap_notifier gmap_notifier;
224 static struct gmap_notifier vsie_gmap_notifier;
225 debug_info_t *kvm_s390_dbf;
226 debug_info_t *kvm_s390_dbf_uv;
228 /* Section: not file related */
229 int kvm_arch_hardware_enable(void)
231 /* every s390 is virtualization enabled ;-) */
235 int kvm_arch_check_processor_compat(void *opaque)
240 /* forward declarations */
241 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
243 static int sca_switch_to_extended(struct kvm *kvm);
245 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
250 * The TOD jumps by delta, we have to compensate this by adding
251 * -delta to the epoch.
255 /* sign-extension - we're adding to signed values below */
260 if (scb->ecd & ECD_MEF) {
261 scb->epdx += delta_idx;
262 if (scb->epoch < delta)
268 * This callback is executed during stop_machine(). All CPUs are therefore
269 * temporarily stopped. In order not to change guest behavior, we have to
270 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
271 * so a CPU won't be stopped while calculating with the epoch.
273 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
277 struct kvm_vcpu *vcpu;
279 unsigned long long *delta = v;
281 list_for_each_entry(kvm, &vm_list, vm_list) {
282 kvm_for_each_vcpu(i, vcpu, kvm) {
283 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
285 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
286 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
288 if (vcpu->arch.cputm_enabled)
289 vcpu->arch.cputm_start += *delta;
290 if (vcpu->arch.vsie_block)
291 kvm_clock_sync_scb(vcpu->arch.vsie_block,
298 static struct notifier_block kvm_clock_notifier = {
299 .notifier_call = kvm_clock_sync,
302 int kvm_arch_hardware_setup(void *opaque)
304 gmap_notifier.notifier_call = kvm_gmap_notifier;
305 gmap_register_pte_notifier(&gmap_notifier);
306 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
307 gmap_register_pte_notifier(&vsie_gmap_notifier);
308 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
309 &kvm_clock_notifier);
313 void kvm_arch_hardware_unsetup(void)
315 gmap_unregister_pte_notifier(&gmap_notifier);
316 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
317 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
318 &kvm_clock_notifier);
321 static void allow_cpu_feat(unsigned long nr)
323 set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 static inline int plo_test_bit(unsigned char nr)
328 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
332 /* Parameter registers are ignored for "test bit" */
342 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
344 register unsigned long r0 asm("0") = 0; /* query function */
345 register unsigned long r1 asm("1") = (unsigned long) query;
348 /* Parameter regs are ignored */
349 " .insn rrf,%[opc] << 16,2,4,6,0\n"
351 : "d" (r0), "a" (r1), [opc] "i" (opcode)
355 #define INSN_SORTL 0xb938
356 #define INSN_DFLTCC 0xb939
358 static void kvm_s390_cpu_feat_init(void)
362 for (i = 0; i < 256; ++i) {
364 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367 if (test_facility(28)) /* TOD-clock steering */
368 ptff(kvm_s390_available_subfunc.ptff,
369 sizeof(kvm_s390_available_subfunc.ptff),
372 if (test_facility(17)) { /* MSA */
373 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
374 kvm_s390_available_subfunc.kmac);
375 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
376 kvm_s390_available_subfunc.kmc);
377 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
378 kvm_s390_available_subfunc.km);
379 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
380 kvm_s390_available_subfunc.kimd);
381 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
382 kvm_s390_available_subfunc.klmd);
384 if (test_facility(76)) /* MSA3 */
385 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
386 kvm_s390_available_subfunc.pckmo);
387 if (test_facility(77)) { /* MSA4 */
388 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
389 kvm_s390_available_subfunc.kmctr);
390 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
391 kvm_s390_available_subfunc.kmf);
392 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
393 kvm_s390_available_subfunc.kmo);
394 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
395 kvm_s390_available_subfunc.pcc);
397 if (test_facility(57)) /* MSA5 */
398 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
399 kvm_s390_available_subfunc.ppno);
401 if (test_facility(146)) /* MSA8 */
402 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
403 kvm_s390_available_subfunc.kma);
405 if (test_facility(155)) /* MSA9 */
406 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
407 kvm_s390_available_subfunc.kdsa);
409 if (test_facility(150)) /* SORTL */
410 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
412 if (test_facility(151)) /* DFLTCC */
413 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
415 if (MACHINE_HAS_ESOP)
416 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
418 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
419 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
421 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
422 !test_facility(3) || !nested)
424 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
425 if (sclp.has_64bscao)
426 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
442 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
443 * all skey handling functions read/set the skey from the PGSTE
444 * instead of the real storage key.
446 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
447 * pages being detected as preserved although they are resident.
449 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
450 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
452 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
453 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
454 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
456 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
457 * cannot easily shadow the SCA because of the ipte lock.
461 int kvm_arch_init(void *opaque)
465 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
469 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
470 if (!kvm_s390_dbf_uv)
473 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
474 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477 kvm_s390_cpu_feat_init();
479 /* Register floating interrupt controller interface. */
480 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
482 pr_err("A FLIC registration call failed with rc=%d\n", rc);
486 rc = kvm_s390_gib_init(GAL_ISC);
497 void kvm_arch_exit(void)
499 kvm_s390_gib_destroy();
500 debug_unregister(kvm_s390_dbf);
501 debug_unregister(kvm_s390_dbf_uv);
504 /* Section: device related */
505 long kvm_arch_dev_ioctl(struct file *filp,
506 unsigned int ioctl, unsigned long arg)
508 if (ioctl == KVM_S390_ENABLE_SIE)
509 return s390_enable_sie();
513 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
518 case KVM_CAP_S390_PSW:
519 case KVM_CAP_S390_GMAP:
520 case KVM_CAP_SYNC_MMU:
521 #ifdef CONFIG_KVM_S390_UCONTROL
522 case KVM_CAP_S390_UCONTROL:
524 case KVM_CAP_ASYNC_PF:
525 case KVM_CAP_SYNC_REGS:
526 case KVM_CAP_ONE_REG:
527 case KVM_CAP_ENABLE_CAP:
528 case KVM_CAP_S390_CSS_SUPPORT:
529 case KVM_CAP_IOEVENTFD:
530 case KVM_CAP_DEVICE_CTRL:
531 case KVM_CAP_S390_IRQCHIP:
532 case KVM_CAP_VM_ATTRIBUTES:
533 case KVM_CAP_MP_STATE:
534 case KVM_CAP_IMMEDIATE_EXIT:
535 case KVM_CAP_S390_INJECT_IRQ:
536 case KVM_CAP_S390_USER_SIGP:
537 case KVM_CAP_S390_USER_STSI:
538 case KVM_CAP_S390_SKEYS:
539 case KVM_CAP_S390_IRQ_STATE:
540 case KVM_CAP_S390_USER_INSTR0:
541 case KVM_CAP_S390_CMMA_MIGRATION:
542 case KVM_CAP_S390_AIS:
543 case KVM_CAP_S390_AIS_MIGRATION:
544 case KVM_CAP_S390_VCPU_RESETS:
545 case KVM_CAP_SET_GUEST_DEBUG:
548 case KVM_CAP_S390_HPAGE_1M:
550 if (hpage && !kvm_is_ucontrol(kvm))
553 case KVM_CAP_S390_MEM_OP:
556 case KVM_CAP_NR_VCPUS:
557 case KVM_CAP_MAX_VCPUS:
558 case KVM_CAP_MAX_VCPU_ID:
559 r = KVM_S390_BSCA_CPU_SLOTS;
560 if (!kvm_s390_use_sca_entries())
562 else if (sclp.has_esca && sclp.has_64bscao)
563 r = KVM_S390_ESCA_CPU_SLOTS;
565 case KVM_CAP_S390_COW:
566 r = MACHINE_HAS_ESOP;
568 case KVM_CAP_S390_VECTOR_REGISTERS:
571 case KVM_CAP_S390_RI:
572 r = test_facility(64);
574 case KVM_CAP_S390_GS:
575 r = test_facility(133);
577 case KVM_CAP_S390_BPB:
578 r = test_facility(82);
580 case KVM_CAP_S390_PROTECTED:
581 r = is_prot_virt_host();
589 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
592 gfn_t cur_gfn, last_gfn;
593 unsigned long gaddr, vmaddr;
594 struct gmap *gmap = kvm->arch.gmap;
595 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
597 /* Loop over all guest segments */
598 cur_gfn = memslot->base_gfn;
599 last_gfn = memslot->base_gfn + memslot->npages;
600 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
601 gaddr = gfn_to_gpa(cur_gfn);
602 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
603 if (kvm_is_error_hva(vmaddr))
606 bitmap_zero(bitmap, _PAGE_ENTRIES);
607 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
608 for (i = 0; i < _PAGE_ENTRIES; i++) {
609 if (test_bit(i, bitmap))
610 mark_page_dirty(kvm, cur_gfn + i);
613 if (fatal_signal_pending(current))
619 /* Section: vm related */
620 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
623 * Get (and clear) the dirty memory log for a memory slot.
625 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
626 struct kvm_dirty_log *log)
630 struct kvm_memory_slot *memslot;
633 if (kvm_is_ucontrol(kvm))
636 mutex_lock(&kvm->slots_lock);
639 if (log->slot >= KVM_USER_MEM_SLOTS)
642 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646 /* Clear the dirty log */
648 n = kvm_dirty_bitmap_bytes(memslot);
649 memset(memslot->dirty_bitmap, 0, n);
653 mutex_unlock(&kvm->slots_lock);
657 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
660 struct kvm_vcpu *vcpu;
662 kvm_for_each_vcpu(i, vcpu, kvm) {
663 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
675 case KVM_CAP_S390_IRQCHIP:
676 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
677 kvm->arch.use_irqchip = 1;
680 case KVM_CAP_S390_USER_SIGP:
681 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
682 kvm->arch.user_sigp = 1;
685 case KVM_CAP_S390_VECTOR_REGISTERS:
686 mutex_lock(&kvm->lock);
687 if (kvm->created_vcpus) {
689 } else if (MACHINE_HAS_VX) {
690 set_kvm_facility(kvm->arch.model.fac_mask, 129);
691 set_kvm_facility(kvm->arch.model.fac_list, 129);
692 if (test_facility(134)) {
693 set_kvm_facility(kvm->arch.model.fac_mask, 134);
694 set_kvm_facility(kvm->arch.model.fac_list, 134);
696 if (test_facility(135)) {
697 set_kvm_facility(kvm->arch.model.fac_mask, 135);
698 set_kvm_facility(kvm->arch.model.fac_list, 135);
700 if (test_facility(148)) {
701 set_kvm_facility(kvm->arch.model.fac_mask, 148);
702 set_kvm_facility(kvm->arch.model.fac_list, 148);
704 if (test_facility(152)) {
705 set_kvm_facility(kvm->arch.model.fac_mask, 152);
706 set_kvm_facility(kvm->arch.model.fac_list, 152);
711 mutex_unlock(&kvm->lock);
712 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
713 r ? "(not available)" : "(success)");
715 case KVM_CAP_S390_RI:
717 mutex_lock(&kvm->lock);
718 if (kvm->created_vcpus) {
720 } else if (test_facility(64)) {
721 set_kvm_facility(kvm->arch.model.fac_mask, 64);
722 set_kvm_facility(kvm->arch.model.fac_list, 64);
725 mutex_unlock(&kvm->lock);
726 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
727 r ? "(not available)" : "(success)");
729 case KVM_CAP_S390_AIS:
730 mutex_lock(&kvm->lock);
731 if (kvm->created_vcpus) {
734 set_kvm_facility(kvm->arch.model.fac_mask, 72);
735 set_kvm_facility(kvm->arch.model.fac_list, 72);
738 mutex_unlock(&kvm->lock);
739 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
740 r ? "(not available)" : "(success)");
742 case KVM_CAP_S390_GS:
744 mutex_lock(&kvm->lock);
745 if (kvm->created_vcpus) {
747 } else if (test_facility(133)) {
748 set_kvm_facility(kvm->arch.model.fac_mask, 133);
749 set_kvm_facility(kvm->arch.model.fac_list, 133);
752 mutex_unlock(&kvm->lock);
753 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
754 r ? "(not available)" : "(success)");
756 case KVM_CAP_S390_HPAGE_1M:
757 mutex_lock(&kvm->lock);
758 if (kvm->created_vcpus)
760 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764 down_write(&kvm->mm->mmap_sem);
765 kvm->mm->context.allow_gmap_hpage_1m = 1;
766 up_write(&kvm->mm->mmap_sem);
768 * We might have to create fake 4k page
769 * tables. To avoid that the hardware works on
770 * stale PGSTEs, we emulate these instructions.
772 kvm->arch.use_skf = 0;
773 kvm->arch.use_pfmfi = 0;
775 mutex_unlock(&kvm->lock);
776 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
777 r ? "(not available)" : "(success)");
779 case KVM_CAP_S390_USER_STSI:
780 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
781 kvm->arch.user_stsi = 1;
784 case KVM_CAP_S390_USER_INSTR0:
785 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
786 kvm->arch.user_instr0 = 1;
787 icpt_operexc_on_all_vcpus(kvm);
797 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 switch (attr->attr) {
802 case KVM_S390_VM_MEM_LIMIT_SIZE:
804 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
805 kvm->arch.mem_limit);
806 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
816 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 switch (attr->attr) {
821 case KVM_S390_VM_MEM_ENABLE_CMMA:
826 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
827 mutex_lock(&kvm->lock);
828 if (kvm->created_vcpus)
830 else if (kvm->mm->context.allow_gmap_hpage_1m)
833 kvm->arch.use_cmma = 1;
834 /* Not compatible with cmma. */
835 kvm->arch.use_pfmfi = 0;
838 mutex_unlock(&kvm->lock);
840 case KVM_S390_VM_MEM_CLR_CMMA:
845 if (!kvm->arch.use_cmma)
848 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
849 mutex_lock(&kvm->lock);
850 idx = srcu_read_lock(&kvm->srcu);
851 s390_reset_cmma(kvm->arch.gmap->mm);
852 srcu_read_unlock(&kvm->srcu, idx);
853 mutex_unlock(&kvm->lock);
856 case KVM_S390_VM_MEM_LIMIT_SIZE: {
857 unsigned long new_limit;
859 if (kvm_is_ucontrol(kvm))
862 if (get_user(new_limit, (u64 __user *)attr->addr))
865 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
866 new_limit > kvm->arch.mem_limit)
872 /* gmap_create takes last usable address */
873 if (new_limit != KVM_S390_NO_MEM_LIMIT)
877 mutex_lock(&kvm->lock);
878 if (!kvm->created_vcpus) {
879 /* gmap_create will round the limit up */
880 struct gmap *new = gmap_create(current->mm, new_limit);
885 gmap_remove(kvm->arch.gmap);
887 kvm->arch.gmap = new;
891 mutex_unlock(&kvm->lock);
892 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
893 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
894 (void *) kvm->arch.gmap->asce);
904 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
906 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
908 struct kvm_vcpu *vcpu;
911 kvm_s390_vcpu_block_all(kvm);
913 kvm_for_each_vcpu(i, vcpu, kvm) {
914 kvm_s390_vcpu_crypto_setup(vcpu);
915 /* recreate the shadow crycb by leaving the VSIE handler */
916 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
919 kvm_s390_vcpu_unblock_all(kvm);
922 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
924 mutex_lock(&kvm->lock);
925 switch (attr->attr) {
926 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
927 if (!test_kvm_facility(kvm, 76)) {
928 mutex_unlock(&kvm->lock);
932 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
933 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
934 kvm->arch.crypto.aes_kw = 1;
935 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
937 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
938 if (!test_kvm_facility(kvm, 76)) {
939 mutex_unlock(&kvm->lock);
943 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
944 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
945 kvm->arch.crypto.dea_kw = 1;
946 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
948 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
949 if (!test_kvm_facility(kvm, 76)) {
950 mutex_unlock(&kvm->lock);
953 kvm->arch.crypto.aes_kw = 0;
954 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
955 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
956 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
958 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
959 if (!test_kvm_facility(kvm, 76)) {
960 mutex_unlock(&kvm->lock);
963 kvm->arch.crypto.dea_kw = 0;
964 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
965 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
966 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
968 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
969 if (!ap_instructions_available()) {
970 mutex_unlock(&kvm->lock);
973 kvm->arch.crypto.apie = 1;
975 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
976 if (!ap_instructions_available()) {
977 mutex_unlock(&kvm->lock);
980 kvm->arch.crypto.apie = 0;
983 mutex_unlock(&kvm->lock);
987 kvm_s390_vcpu_crypto_reset_all(kvm);
988 mutex_unlock(&kvm->lock);
992 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
995 struct kvm_vcpu *vcpu;
997 kvm_for_each_vcpu(cx, vcpu, kvm)
998 kvm_s390_sync_request(req, vcpu);
1002 * Must be called with kvm->srcu held to avoid races on memslots, and with
1003 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1005 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1007 struct kvm_memory_slot *ms;
1008 struct kvm_memslots *slots;
1009 unsigned long ram_pages = 0;
1012 /* migration mode already enabled */
1013 if (kvm->arch.migration_mode)
1015 slots = kvm_memslots(kvm);
1016 if (!slots || !slots->used_slots)
1019 if (!kvm->arch.use_cmma) {
1020 kvm->arch.migration_mode = 1;
1023 /* mark all the pages in active slots as dirty */
1024 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1025 ms = slots->memslots + slotnr;
1026 if (!ms->dirty_bitmap)
1029 * The second half of the bitmap is only used on x86,
1030 * and would be wasted otherwise, so we put it to good
1031 * use here to keep track of the state of the storage
1034 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1035 ram_pages += ms->npages;
1037 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1038 kvm->arch.migration_mode = 1;
1039 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1044 * Must be called with kvm->slots_lock to avoid races with ourselves and
1045 * kvm_s390_vm_start_migration.
1047 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1049 /* migration mode already disabled */
1050 if (!kvm->arch.migration_mode)
1052 kvm->arch.migration_mode = 0;
1053 if (kvm->arch.use_cmma)
1054 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1059 struct kvm_device_attr *attr)
1063 mutex_lock(&kvm->slots_lock);
1064 switch (attr->attr) {
1065 case KVM_S390_VM_MIGRATION_START:
1066 res = kvm_s390_vm_start_migration(kvm);
1068 case KVM_S390_VM_MIGRATION_STOP:
1069 res = kvm_s390_vm_stop_migration(kvm);
1074 mutex_unlock(&kvm->slots_lock);
1079 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1080 struct kvm_device_attr *attr)
1082 u64 mig = kvm->arch.migration_mode;
1084 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1087 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1092 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1094 struct kvm_s390_vm_tod_clock gtod;
1096 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1099 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1101 kvm_s390_set_tod_clock(kvm, >od);
1103 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1104 gtod.epoch_idx, gtod.tod);
1109 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1113 if (copy_from_user(>od_high, (void __user *)attr->addr,
1119 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1124 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1126 struct kvm_s390_vm_tod_clock gtod = { 0 };
1128 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1132 kvm_s390_set_tod_clock(kvm, >od);
1133 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1137 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1144 switch (attr->attr) {
1145 case KVM_S390_VM_TOD_EXT:
1146 ret = kvm_s390_set_tod_ext(kvm, attr);
1148 case KVM_S390_VM_TOD_HIGH:
1149 ret = kvm_s390_set_tod_high(kvm, attr);
1151 case KVM_S390_VM_TOD_LOW:
1152 ret = kvm_s390_set_tod_low(kvm, attr);
1161 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1162 struct kvm_s390_vm_tod_clock *gtod)
1164 struct kvm_s390_tod_clock_ext htod;
1168 get_tod_clock_ext((char *)&htod);
1170 gtod->tod = htod.tod + kvm->arch.epoch;
1171 gtod->epoch_idx = 0;
1172 if (test_kvm_facility(kvm, 139)) {
1173 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1174 if (gtod->tod < htod.tod)
1175 gtod->epoch_idx += 1;
1181 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1183 struct kvm_s390_vm_tod_clock gtod;
1185 memset(>od, 0, sizeof(gtod));
1186 kvm_s390_get_tod_clock(kvm, >od);
1187 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1190 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1191 gtod.epoch_idx, gtod.tod);
1195 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1199 if (copy_to_user((void __user *)attr->addr, >od_high,
1202 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1207 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1211 gtod = kvm_s390_get_tod_clock_fast(kvm);
1212 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1214 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1219 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1226 switch (attr->attr) {
1227 case KVM_S390_VM_TOD_EXT:
1228 ret = kvm_s390_get_tod_ext(kvm, attr);
1230 case KVM_S390_VM_TOD_HIGH:
1231 ret = kvm_s390_get_tod_high(kvm, attr);
1233 case KVM_S390_VM_TOD_LOW:
1234 ret = kvm_s390_get_tod_low(kvm, attr);
1243 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1245 struct kvm_s390_vm_cpu_processor *proc;
1246 u16 lowest_ibc, unblocked_ibc;
1249 mutex_lock(&kvm->lock);
1250 if (kvm->created_vcpus) {
1254 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1259 if (!copy_from_user(proc, (void __user *)attr->addr,
1261 kvm->arch.model.cpuid = proc->cpuid;
1262 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1263 unblocked_ibc = sclp.ibc & 0xfff;
1264 if (lowest_ibc && proc->ibc) {
1265 if (proc->ibc > unblocked_ibc)
1266 kvm->arch.model.ibc = unblocked_ibc;
1267 else if (proc->ibc < lowest_ibc)
1268 kvm->arch.model.ibc = lowest_ibc;
1270 kvm->arch.model.ibc = proc->ibc;
1272 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1273 S390_ARCH_FAC_LIST_SIZE_BYTE);
1274 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1275 kvm->arch.model.ibc,
1276 kvm->arch.model.cpuid);
1277 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1278 kvm->arch.model.fac_list[0],
1279 kvm->arch.model.fac_list[1],
1280 kvm->arch.model.fac_list[2]);
1285 mutex_unlock(&kvm->lock);
1289 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1290 struct kvm_device_attr *attr)
1292 struct kvm_s390_vm_cpu_feat data;
1294 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1296 if (!bitmap_subset((unsigned long *) data.feat,
1297 kvm_s390_available_cpu_feat,
1298 KVM_S390_VM_CPU_FEAT_NR_BITS))
1301 mutex_lock(&kvm->lock);
1302 if (kvm->created_vcpus) {
1303 mutex_unlock(&kvm->lock);
1306 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1307 KVM_S390_VM_CPU_FEAT_NR_BITS);
1308 mutex_unlock(&kvm->lock);
1309 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1316 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1317 struct kvm_device_attr *attr)
1319 mutex_lock(&kvm->lock);
1320 if (kvm->created_vcpus) {
1321 mutex_unlock(&kvm->lock);
1325 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1326 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1327 mutex_unlock(&kvm->lock);
1330 mutex_unlock(&kvm->lock);
1332 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1333 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1334 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1335 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1336 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1337 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1338 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1339 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1340 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1341 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1342 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1343 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1344 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1345 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1346 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1347 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1348 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1349 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1350 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1351 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1352 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1353 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1354 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1355 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1356 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1357 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1358 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1359 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1360 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1361 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1364 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1365 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1366 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1367 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1368 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1369 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1370 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1371 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1372 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1373 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1374 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1375 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1376 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1377 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1378 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1379 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1380 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1381 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1382 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1384 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1387 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1388 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1393 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1397 switch (attr->attr) {
1398 case KVM_S390_VM_CPU_PROCESSOR:
1399 ret = kvm_s390_set_processor(kvm, attr);
1401 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1402 ret = kvm_s390_set_processor_feat(kvm, attr);
1404 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1405 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1411 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1413 struct kvm_s390_vm_cpu_processor *proc;
1416 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1421 proc->cpuid = kvm->arch.model.cpuid;
1422 proc->ibc = kvm->arch.model.ibc;
1423 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1424 S390_ARCH_FAC_LIST_SIZE_BYTE);
1425 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1426 kvm->arch.model.ibc,
1427 kvm->arch.model.cpuid);
1428 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1429 kvm->arch.model.fac_list[0],
1430 kvm->arch.model.fac_list[1],
1431 kvm->arch.model.fac_list[2]);
1432 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1439 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1441 struct kvm_s390_vm_cpu_machine *mach;
1444 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1449 get_cpu_id((struct cpuid *) &mach->cpuid);
1450 mach->ibc = sclp.ibc;
1451 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1452 S390_ARCH_FAC_LIST_SIZE_BYTE);
1453 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1454 sizeof(S390_lowcore.stfle_fac_list));
1455 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1456 kvm->arch.model.ibc,
1457 kvm->arch.model.cpuid);
1458 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1462 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1466 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1473 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1474 struct kvm_device_attr *attr)
1476 struct kvm_s390_vm_cpu_feat data;
1478 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1479 KVM_S390_VM_CPU_FEAT_NR_BITS);
1480 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1482 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1489 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1490 struct kvm_device_attr *attr)
1492 struct kvm_s390_vm_cpu_feat data;
1494 bitmap_copy((unsigned long *) data.feat,
1495 kvm_s390_available_cpu_feat,
1496 KVM_S390_VM_CPU_FEAT_NR_BITS);
1497 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1499 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1506 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1507 struct kvm_device_attr *attr)
1509 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1510 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1513 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1514 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1515 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1516 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1517 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1518 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1519 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1520 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1521 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1522 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1523 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1524 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1525 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1526 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1527 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1528 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1529 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1530 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1531 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1532 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1533 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1534 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1535 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1536 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1537 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1538 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1539 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1540 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1541 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1542 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1545 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1546 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1547 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1548 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1549 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1550 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1551 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1552 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1553 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1554 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1556 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1557 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1558 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1559 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1560 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1561 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1562 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1563 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1565 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1568 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1569 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1574 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1575 struct kvm_device_attr *attr)
1577 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1578 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1581 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1582 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1583 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1584 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1585 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1586 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1588 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1589 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1591 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1592 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1593 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1594 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1595 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1596 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1597 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1598 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1599 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1600 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1601 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1602 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1603 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1604 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1605 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1606 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1607 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1608 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1609 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1610 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1613 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1614 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1615 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1616 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1617 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1618 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1619 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1620 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1621 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1622 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1623 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1624 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1625 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1626 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1627 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1628 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1629 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1630 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1631 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1632 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1633 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1636 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1637 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1642 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1646 switch (attr->attr) {
1647 case KVM_S390_VM_CPU_PROCESSOR:
1648 ret = kvm_s390_get_processor(kvm, attr);
1650 case KVM_S390_VM_CPU_MACHINE:
1651 ret = kvm_s390_get_machine(kvm, attr);
1653 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1654 ret = kvm_s390_get_processor_feat(kvm, attr);
1656 case KVM_S390_VM_CPU_MACHINE_FEAT:
1657 ret = kvm_s390_get_machine_feat(kvm, attr);
1659 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1660 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1662 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1663 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1669 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1673 switch (attr->group) {
1674 case KVM_S390_VM_MEM_CTRL:
1675 ret = kvm_s390_set_mem_control(kvm, attr);
1677 case KVM_S390_VM_TOD:
1678 ret = kvm_s390_set_tod(kvm, attr);
1680 case KVM_S390_VM_CPU_MODEL:
1681 ret = kvm_s390_set_cpu_model(kvm, attr);
1683 case KVM_S390_VM_CRYPTO:
1684 ret = kvm_s390_vm_set_crypto(kvm, attr);
1686 case KVM_S390_VM_MIGRATION:
1687 ret = kvm_s390_vm_set_migration(kvm, attr);
1697 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1701 switch (attr->group) {
1702 case KVM_S390_VM_MEM_CTRL:
1703 ret = kvm_s390_get_mem_control(kvm, attr);
1705 case KVM_S390_VM_TOD:
1706 ret = kvm_s390_get_tod(kvm, attr);
1708 case KVM_S390_VM_CPU_MODEL:
1709 ret = kvm_s390_get_cpu_model(kvm, attr);
1711 case KVM_S390_VM_MIGRATION:
1712 ret = kvm_s390_vm_get_migration(kvm, attr);
1722 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726 switch (attr->group) {
1727 case KVM_S390_VM_MEM_CTRL:
1728 switch (attr->attr) {
1729 case KVM_S390_VM_MEM_ENABLE_CMMA:
1730 case KVM_S390_VM_MEM_CLR_CMMA:
1731 ret = sclp.has_cmma ? 0 : -ENXIO;
1733 case KVM_S390_VM_MEM_LIMIT_SIZE:
1741 case KVM_S390_VM_TOD:
1742 switch (attr->attr) {
1743 case KVM_S390_VM_TOD_LOW:
1744 case KVM_S390_VM_TOD_HIGH:
1752 case KVM_S390_VM_CPU_MODEL:
1753 switch (attr->attr) {
1754 case KVM_S390_VM_CPU_PROCESSOR:
1755 case KVM_S390_VM_CPU_MACHINE:
1756 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1757 case KVM_S390_VM_CPU_MACHINE_FEAT:
1758 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1759 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1767 case KVM_S390_VM_CRYPTO:
1768 switch (attr->attr) {
1769 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1770 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1771 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1772 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1775 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1776 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1777 ret = ap_instructions_available() ? 0 : -ENXIO;
1784 case KVM_S390_VM_MIGRATION:
1795 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1799 int srcu_idx, i, r = 0;
1801 if (args->flags != 0)
1804 /* Is this guest using storage keys? */
1805 if (!mm_uses_skeys(current->mm))
1806 return KVM_S390_GET_SKEYS_NONE;
1808 /* Enforce sane limit on memory allocation */
1809 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1812 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1816 down_read(¤t->mm->mmap_sem);
1817 srcu_idx = srcu_read_lock(&kvm->srcu);
1818 for (i = 0; i < args->count; i++) {
1819 hva = gfn_to_hva(kvm, args->start_gfn + i);
1820 if (kvm_is_error_hva(hva)) {
1825 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1829 srcu_read_unlock(&kvm->srcu, srcu_idx);
1830 up_read(¤t->mm->mmap_sem);
1833 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1834 sizeof(uint8_t) * args->count);
1843 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1847 int srcu_idx, i, r = 0;
1850 if (args->flags != 0)
1853 /* Enforce sane limit on memory allocation */
1854 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1857 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1861 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1862 sizeof(uint8_t) * args->count);
1868 /* Enable storage key handling for the guest */
1869 r = s390_enable_skey();
1874 down_read(¤t->mm->mmap_sem);
1875 srcu_idx = srcu_read_lock(&kvm->srcu);
1876 while (i < args->count) {
1878 hva = gfn_to_hva(kvm, args->start_gfn + i);
1879 if (kvm_is_error_hva(hva)) {
1884 /* Lowest order bit is reserved */
1885 if (keys[i] & 0x01) {
1890 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1892 r = fixup_user_fault(current, current->mm, hva,
1893 FAULT_FLAG_WRITE, &unlocked);
1900 srcu_read_unlock(&kvm->srcu, srcu_idx);
1901 up_read(¤t->mm->mmap_sem);
1908 * Base address and length must be sent at the start of each block, therefore
1909 * it's cheaper to send some clean data, as long as it's less than the size of
1912 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1913 /* for consistency */
1914 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1917 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1918 * address falls in a hole. In that case the index of one of the memslots
1919 * bordering the hole is returned.
1921 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1923 int start = 0, end = slots->used_slots;
1924 int slot = atomic_read(&slots->lru_slot);
1925 struct kvm_memory_slot *memslots = slots->memslots;
1927 if (gfn >= memslots[slot].base_gfn &&
1928 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1931 while (start < end) {
1932 slot = start + (end - start) / 2;
1934 if (gfn >= memslots[slot].base_gfn)
1940 if (start >= slots->used_slots)
1941 return slots->used_slots - 1;
1943 if (gfn >= memslots[start].base_gfn &&
1944 gfn < memslots[start].base_gfn + memslots[start].npages) {
1945 atomic_set(&slots->lru_slot, start);
1951 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1952 u8 *res, unsigned long bufsize)
1954 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1957 while (args->count < bufsize) {
1958 hva = gfn_to_hva(kvm, cur_gfn);
1960 * We return an error if the first value was invalid, but we
1961 * return successfully if at least one value was copied.
1963 if (kvm_is_error_hva(hva))
1964 return args->count ? 0 : -EFAULT;
1965 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1967 res[args->count++] = (pgstev >> 24) & 0x43;
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975 unsigned long cur_gfn)
1977 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1978 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1979 unsigned long ofs = cur_gfn - ms->base_gfn;
1981 if (ms->base_gfn + ms->npages <= cur_gfn) {
1983 /* If we are above the highest slot, wrap around */
1985 slotidx = slots->used_slots - 1;
1987 ms = slots->memslots + slotidx;
1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991 while ((slotidx > 0) && (ofs >= ms->npages)) {
1993 ms = slots->memslots + slotidx;
1994 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1996 return ms->base_gfn + ofs;
1999 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2000 u8 *res, unsigned long bufsize)
2002 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2003 struct kvm_memslots *slots = kvm_memslots(kvm);
2004 struct kvm_memory_slot *ms;
2006 if (unlikely(!slots->used_slots))
2009 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2010 ms = gfn_to_memslot(kvm, cur_gfn);
2012 args->start_gfn = cur_gfn;
2015 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2016 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2018 while (args->count < bufsize) {
2019 hva = gfn_to_hva(kvm, cur_gfn);
2020 if (kvm_is_error_hva(hva))
2022 /* Decrement only if we actually flipped the bit to 0 */
2023 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2024 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2025 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2027 /* Save the value */
2028 res[args->count++] = (pgstev >> 24) & 0x43;
2029 /* If the next bit is too far away, stop. */
2030 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2032 /* If we reached the previous "next", find the next one */
2033 if (cur_gfn == next_gfn)
2034 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2035 /* Reached the end of memory or of the buffer, stop */
2036 if ((next_gfn >= mem_end) ||
2037 (next_gfn - args->start_gfn >= bufsize))
2040 /* Reached the end of the current memslot, take the next one. */
2041 if (cur_gfn - ms->base_gfn >= ms->npages) {
2042 ms = gfn_to_memslot(kvm, cur_gfn);
2051 * This function searches for the next page with dirty CMMA attributes, and
2052 * saves the attributes in the buffer up to either the end of the buffer or
2053 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2054 * no trailing clean bytes are saved.
2055 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2056 * output buffer will indicate 0 as length.
2058 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2059 struct kvm_s390_cmma_log *args)
2061 unsigned long bufsize;
2062 int srcu_idx, peek, ret;
2065 if (!kvm->arch.use_cmma)
2067 /* Invalid/unsupported flags were specified */
2068 if (args->flags & ~KVM_S390_CMMA_PEEK)
2070 /* Migration mode query, and we are not doing a migration */
2071 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2072 if (!peek && !kvm->arch.migration_mode)
2074 /* CMMA is disabled or was not used, or the buffer has length zero */
2075 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2076 if (!bufsize || !kvm->mm->context.uses_cmm) {
2077 memset(args, 0, sizeof(*args));
2080 /* We are not peeking, and there are no dirty pages */
2081 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2082 memset(args, 0, sizeof(*args));
2086 values = vmalloc(bufsize);
2090 down_read(&kvm->mm->mmap_sem);
2091 srcu_idx = srcu_read_lock(&kvm->srcu);
2093 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2095 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2096 srcu_read_unlock(&kvm->srcu, srcu_idx);
2097 up_read(&kvm->mm->mmap_sem);
2099 if (kvm->arch.migration_mode)
2100 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2102 args->remaining = 0;
2104 if (copy_to_user((void __user *)args->values, values, args->count))
2112 * This function sets the CMMA attributes for the given pages. If the input
2113 * buffer has zero length, no action is taken, otherwise the attributes are
2114 * set and the mm->context.uses_cmm flag is set.
2116 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2117 const struct kvm_s390_cmma_log *args)
2119 unsigned long hva, mask, pgstev, i;
2121 int srcu_idx, r = 0;
2125 if (!kvm->arch.use_cmma)
2127 /* invalid/unsupported flags */
2128 if (args->flags != 0)
2130 /* Enforce sane limit on memory allocation */
2131 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2134 if (args->count == 0)
2137 bits = vmalloc(array_size(sizeof(*bits), args->count));
2141 r = copy_from_user(bits, (void __user *)args->values, args->count);
2147 down_read(&kvm->mm->mmap_sem);
2148 srcu_idx = srcu_read_lock(&kvm->srcu);
2149 for (i = 0; i < args->count; i++) {
2150 hva = gfn_to_hva(kvm, args->start_gfn + i);
2151 if (kvm_is_error_hva(hva)) {
2157 pgstev = pgstev << 24;
2158 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2159 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2161 srcu_read_unlock(&kvm->srcu, srcu_idx);
2162 up_read(&kvm->mm->mmap_sem);
2164 if (!kvm->mm->context.uses_cmm) {
2165 down_write(&kvm->mm->mmap_sem);
2166 kvm->mm->context.uses_cmm = 1;
2167 up_write(&kvm->mm->mmap_sem);
2174 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2176 struct kvm_vcpu *vcpu;
2182 * We ignore failures and try to destroy as many CPUs as possible.
2183 * At the same time we must not free the assigned resources when
2184 * this fails, as the ultravisor has still access to that memory.
2185 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2187 * We want to return the first failure rc and rrc, though.
2189 kvm_for_each_vcpu(i, vcpu, kvm) {
2190 mutex_lock(&vcpu->mutex);
2191 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2196 mutex_unlock(&vcpu->mutex);
2201 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2206 struct kvm_vcpu *vcpu;
2208 kvm_for_each_vcpu(i, vcpu, kvm) {
2209 mutex_lock(&vcpu->mutex);
2210 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2211 mutex_unlock(&vcpu->mutex);
2216 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 void __user *argp = (void __user *)cmd->data;
2227 case KVM_PV_ENABLE: {
2229 if (kvm_s390_pv_is_protected(kvm))
2233 * FMT 4 SIE needs esca. As we never switch back to bsca from
2234 * esca, we need no cleanup in the error cases below
2236 r = sca_switch_to_extended(kvm);
2240 down_write(¤t->mm->mmap_sem);
2241 r = gmap_mark_unmergeable();
2242 up_write(¤t->mm->mmap_sem);
2246 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2252 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2254 /* we need to block service interrupts from now on */
2255 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2258 case KVM_PV_DISABLE: {
2260 if (!kvm_s390_pv_is_protected(kvm))
2263 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2265 * If a CPU could not be destroyed, destroy VM will also fail.
2266 * There is no point in trying to destroy it. Instead return
2267 * the rc and rrc from the first CPU that failed destroying.
2271 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2273 /* no need to block service interrupts any more */
2274 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2277 case KVM_PV_SET_SEC_PARMS: {
2278 struct kvm_s390_pv_sec_parm parms = {};
2282 if (!kvm_s390_pv_is_protected(kvm))
2286 if (copy_from_user(&parms, argp, sizeof(parms)))
2289 /* Currently restricted to 8KB */
2291 if (parms.length > PAGE_SIZE * 2)
2295 hdr = vmalloc(parms.length);
2300 if (!copy_from_user(hdr, (void __user *)parms.origin,
2302 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2303 &cmd->rc, &cmd->rrc);
2308 case KVM_PV_UNPACK: {
2309 struct kvm_s390_pv_unp unp = {};
2312 if (!kvm_s390_pv_is_protected(kvm))
2316 if (copy_from_user(&unp, argp, sizeof(unp)))
2319 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2320 &cmd->rc, &cmd->rrc);
2323 case KVM_PV_VERIFY: {
2325 if (!kvm_s390_pv_is_protected(kvm))
2328 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2329 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2330 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334 case KVM_PV_PREP_RESET: {
2336 if (!kvm_s390_pv_is_protected(kvm))
2339 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2340 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2341 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345 case KVM_PV_UNSHARE_ALL: {
2347 if (!kvm_s390_pv_is_protected(kvm))
2350 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2352 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2362 long kvm_arch_vm_ioctl(struct file *filp,
2363 unsigned int ioctl, unsigned long arg)
2365 struct kvm *kvm = filp->private_data;
2366 void __user *argp = (void __user *)arg;
2367 struct kvm_device_attr attr;
2371 case KVM_S390_INTERRUPT: {
2372 struct kvm_s390_interrupt s390int;
2375 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2377 r = kvm_s390_inject_vm(kvm, &s390int);
2380 case KVM_CREATE_IRQCHIP: {
2381 struct kvm_irq_routing_entry routing;
2384 if (kvm->arch.use_irqchip) {
2385 /* Set up dummy routing. */
2386 memset(&routing, 0, sizeof(routing));
2387 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2391 case KVM_SET_DEVICE_ATTR: {
2393 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2395 r = kvm_s390_vm_set_attr(kvm, &attr);
2398 case KVM_GET_DEVICE_ATTR: {
2400 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2402 r = kvm_s390_vm_get_attr(kvm, &attr);
2405 case KVM_HAS_DEVICE_ATTR: {
2407 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2409 r = kvm_s390_vm_has_attr(kvm, &attr);
2412 case KVM_S390_GET_SKEYS: {
2413 struct kvm_s390_skeys args;
2416 if (copy_from_user(&args, argp,
2417 sizeof(struct kvm_s390_skeys)))
2419 r = kvm_s390_get_skeys(kvm, &args);
2422 case KVM_S390_SET_SKEYS: {
2423 struct kvm_s390_skeys args;
2426 if (copy_from_user(&args, argp,
2427 sizeof(struct kvm_s390_skeys)))
2429 r = kvm_s390_set_skeys(kvm, &args);
2432 case KVM_S390_GET_CMMA_BITS: {
2433 struct kvm_s390_cmma_log args;
2436 if (copy_from_user(&args, argp, sizeof(args)))
2438 mutex_lock(&kvm->slots_lock);
2439 r = kvm_s390_get_cmma_bits(kvm, &args);
2440 mutex_unlock(&kvm->slots_lock);
2442 r = copy_to_user(argp, &args, sizeof(args));
2448 case KVM_S390_SET_CMMA_BITS: {
2449 struct kvm_s390_cmma_log args;
2452 if (copy_from_user(&args, argp, sizeof(args)))
2454 mutex_lock(&kvm->slots_lock);
2455 r = kvm_s390_set_cmma_bits(kvm, &args);
2456 mutex_unlock(&kvm->slots_lock);
2459 case KVM_S390_PV_COMMAND: {
2460 struct kvm_pv_cmd args;
2462 /* protvirt means user sigp */
2463 kvm->arch.user_cpu_state_ctrl = 1;
2465 if (!is_prot_virt_host()) {
2469 if (copy_from_user(&args, argp, sizeof(args))) {
2477 mutex_lock(&kvm->lock);
2478 r = kvm_s390_handle_pv(kvm, &args);
2479 mutex_unlock(&kvm->lock);
2480 if (copy_to_user(argp, &args, sizeof(args))) {
2493 static int kvm_s390_apxa_installed(void)
2495 struct ap_config_info info;
2497 if (ap_instructions_available()) {
2498 if (ap_qci(&info) == 0)
2506 * The format of the crypto control block (CRYCB) is specified in the 3 low
2507 * order bits of the CRYCB designation (CRYCBD) field as follows:
2508 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2509 * AP extended addressing (APXA) facility are installed.
2510 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2511 * Format 2: Both the APXA and MSAX3 facilities are installed
2513 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2515 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2517 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2518 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2520 /* Check whether MSAX3 is installed */
2521 if (!test_kvm_facility(kvm, 76))
2524 if (kvm_s390_apxa_installed())
2525 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2527 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2530 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2531 unsigned long *aqm, unsigned long *adm)
2533 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2535 mutex_lock(&kvm->lock);
2536 kvm_s390_vcpu_block_all(kvm);
2538 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2539 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2540 memcpy(crycb->apcb1.apm, apm, 32);
2541 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2542 apm[0], apm[1], apm[2], apm[3]);
2543 memcpy(crycb->apcb1.aqm, aqm, 32);
2544 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2545 aqm[0], aqm[1], aqm[2], aqm[3]);
2546 memcpy(crycb->apcb1.adm, adm, 32);
2547 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2548 adm[0], adm[1], adm[2], adm[3]);
2551 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2552 memcpy(crycb->apcb0.apm, apm, 8);
2553 memcpy(crycb->apcb0.aqm, aqm, 2);
2554 memcpy(crycb->apcb0.adm, adm, 2);
2555 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2556 apm[0], *((unsigned short *)aqm),
2557 *((unsigned short *)adm));
2559 default: /* Can not happen */
2563 /* recreate the shadow crycb for each vcpu */
2564 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2565 kvm_s390_vcpu_unblock_all(kvm);
2566 mutex_unlock(&kvm->lock);
2568 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2570 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2572 mutex_lock(&kvm->lock);
2573 kvm_s390_vcpu_block_all(kvm);
2575 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2576 sizeof(kvm->arch.crypto.crycb->apcb0));
2577 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2578 sizeof(kvm->arch.crypto.crycb->apcb1));
2580 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2581 /* recreate the shadow crycb for each vcpu */
2582 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2583 kvm_s390_vcpu_unblock_all(kvm);
2584 mutex_unlock(&kvm->lock);
2586 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2588 static u64 kvm_s390_get_initial_cpuid(void)
2593 cpuid.version = 0xff;
2594 return *((u64 *) &cpuid);
2597 static void kvm_s390_crypto_init(struct kvm *kvm)
2599 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2600 kvm_s390_set_crycb_format(kvm);
2602 if (!test_kvm_facility(kvm, 76))
2605 /* Enable AES/DEA protected key functions by default */
2606 kvm->arch.crypto.aes_kw = 1;
2607 kvm->arch.crypto.dea_kw = 1;
2608 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2609 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2610 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2611 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2614 static void sca_dispose(struct kvm *kvm)
2616 if (kvm->arch.use_esca)
2617 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2619 free_page((unsigned long)(kvm->arch.sca));
2620 kvm->arch.sca = NULL;
2623 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2625 gfp_t alloc_flags = GFP_KERNEL;
2627 char debug_name[16];
2628 static unsigned long sca_offset;
2631 #ifdef CONFIG_KVM_S390_UCONTROL
2632 if (type & ~KVM_VM_S390_UCONTROL)
2634 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2641 rc = s390_enable_sie();
2647 if (!sclp.has_64bscao)
2648 alloc_flags |= GFP_DMA;
2649 rwlock_init(&kvm->arch.sca_lock);
2650 /* start with basic SCA */
2651 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2654 mutex_lock(&kvm_lock);
2656 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2658 kvm->arch.sca = (struct bsca_block *)
2659 ((char *) kvm->arch.sca + sca_offset);
2660 mutex_unlock(&kvm_lock);
2662 sprintf(debug_name, "kvm-%u", current->pid);
2664 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2668 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2669 kvm->arch.sie_page2 =
2670 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2671 if (!kvm->arch.sie_page2)
2674 kvm->arch.sie_page2->kvm = kvm;
2675 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2677 for (i = 0; i < kvm_s390_fac_size(); i++) {
2678 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2679 (kvm_s390_fac_base[i] |
2680 kvm_s390_fac_ext[i]);
2681 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2682 kvm_s390_fac_base[i];
2684 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2686 /* we are always in czam mode - even on pre z14 machines */
2687 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2688 set_kvm_facility(kvm->arch.model.fac_list, 138);
2689 /* we emulate STHYI in kvm */
2690 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2691 set_kvm_facility(kvm->arch.model.fac_list, 74);
2692 if (MACHINE_HAS_TLB_GUEST) {
2693 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2694 set_kvm_facility(kvm->arch.model.fac_list, 147);
2697 if (css_general_characteristics.aiv && test_facility(65))
2698 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2700 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2701 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2703 kvm_s390_crypto_init(kvm);
2705 mutex_init(&kvm->arch.float_int.ais_lock);
2706 spin_lock_init(&kvm->arch.float_int.lock);
2707 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2708 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2709 init_waitqueue_head(&kvm->arch.ipte_wq);
2710 mutex_init(&kvm->arch.ipte_mutex);
2712 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2713 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2715 if (type & KVM_VM_S390_UCONTROL) {
2716 kvm->arch.gmap = NULL;
2717 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2719 if (sclp.hamax == U64_MAX)
2720 kvm->arch.mem_limit = TASK_SIZE_MAX;
2722 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2724 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2725 if (!kvm->arch.gmap)
2727 kvm->arch.gmap->private = kvm;
2728 kvm->arch.gmap->pfault_enabled = 0;
2731 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2732 kvm->arch.use_skf = sclp.has_skey;
2733 spin_lock_init(&kvm->arch.start_stop_lock);
2734 kvm_s390_vsie_init(kvm);
2736 kvm_s390_gisa_init(kvm);
2737 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2741 free_page((unsigned long)kvm->arch.sie_page2);
2742 debug_unregister(kvm->arch.dbf);
2744 KVM_EVENT(3, "creation of vm failed: %d", rc);
2748 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2752 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2753 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2754 kvm_s390_clear_local_irqs(vcpu);
2755 kvm_clear_async_pf_completion_queue(vcpu);
2756 if (!kvm_is_ucontrol(vcpu->kvm))
2759 if (kvm_is_ucontrol(vcpu->kvm))
2760 gmap_remove(vcpu->arch.gmap);
2762 if (vcpu->kvm->arch.use_cmma)
2763 kvm_s390_vcpu_unsetup_cmma(vcpu);
2764 /* We can not hold the vcpu mutex here, we are already dying */
2765 if (kvm_s390_pv_cpu_get_handle(vcpu))
2766 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2767 free_page((unsigned long)(vcpu->arch.sie_block));
2770 static void kvm_free_vcpus(struct kvm *kvm)
2773 struct kvm_vcpu *vcpu;
2775 kvm_for_each_vcpu(i, vcpu, kvm)
2776 kvm_vcpu_destroy(vcpu);
2778 mutex_lock(&kvm->lock);
2779 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2780 kvm->vcpus[i] = NULL;
2782 atomic_set(&kvm->online_vcpus, 0);
2783 mutex_unlock(&kvm->lock);
2786 void kvm_arch_destroy_vm(struct kvm *kvm)
2790 kvm_free_vcpus(kvm);
2792 kvm_s390_gisa_destroy(kvm);
2794 * We are already at the end of life and kvm->lock is not taken.
2795 * This is ok as the file descriptor is closed by now and nobody
2796 * can mess with the pv state. To avoid lockdep_assert_held from
2797 * complaining we do not use kvm_s390_pv_is_protected.
2799 if (kvm_s390_pv_get_handle(kvm))
2800 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2801 debug_unregister(kvm->arch.dbf);
2802 free_page((unsigned long)kvm->arch.sie_page2);
2803 if (!kvm_is_ucontrol(kvm))
2804 gmap_remove(kvm->arch.gmap);
2805 kvm_s390_destroy_adapters(kvm);
2806 kvm_s390_clear_float_irqs(kvm);
2807 kvm_s390_vsie_destroy(kvm);
2808 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2811 /* Section: vcpu related */
2812 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2814 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2815 if (!vcpu->arch.gmap)
2817 vcpu->arch.gmap->private = vcpu->kvm;
2822 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2824 if (!kvm_s390_use_sca_entries())
2826 read_lock(&vcpu->kvm->arch.sca_lock);
2827 if (vcpu->kvm->arch.use_esca) {
2828 struct esca_block *sca = vcpu->kvm->arch.sca;
2830 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2831 sca->cpu[vcpu->vcpu_id].sda = 0;
2833 struct bsca_block *sca = vcpu->kvm->arch.sca;
2835 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2836 sca->cpu[vcpu->vcpu_id].sda = 0;
2838 read_unlock(&vcpu->kvm->arch.sca_lock);
2841 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2843 if (!kvm_s390_use_sca_entries()) {
2844 struct bsca_block *sca = vcpu->kvm->arch.sca;
2846 /* we still need the basic sca for the ipte control */
2847 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2848 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2851 read_lock(&vcpu->kvm->arch.sca_lock);
2852 if (vcpu->kvm->arch.use_esca) {
2853 struct esca_block *sca = vcpu->kvm->arch.sca;
2855 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2856 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2857 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2858 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2859 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2861 struct bsca_block *sca = vcpu->kvm->arch.sca;
2863 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2866 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2868 read_unlock(&vcpu->kvm->arch.sca_lock);
2871 /* Basic SCA to Extended SCA data copy routines */
2872 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2875 d->sigp_ctrl.c = s->sigp_ctrl.c;
2876 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2879 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2883 d->ipte_control = s->ipte_control;
2885 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2886 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2889 static int sca_switch_to_extended(struct kvm *kvm)
2891 struct bsca_block *old_sca = kvm->arch.sca;
2892 struct esca_block *new_sca;
2893 struct kvm_vcpu *vcpu;
2894 unsigned int vcpu_idx;
2897 if (kvm->arch.use_esca)
2900 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2904 scaoh = (u32)((u64)(new_sca) >> 32);
2905 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2907 kvm_s390_vcpu_block_all(kvm);
2908 write_lock(&kvm->arch.sca_lock);
2910 sca_copy_b_to_e(new_sca, old_sca);
2912 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2913 vcpu->arch.sie_block->scaoh = scaoh;
2914 vcpu->arch.sie_block->scaol = scaol;
2915 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2917 kvm->arch.sca = new_sca;
2918 kvm->arch.use_esca = 1;
2920 write_unlock(&kvm->arch.sca_lock);
2921 kvm_s390_vcpu_unblock_all(kvm);
2923 free_page((unsigned long)old_sca);
2925 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2926 old_sca, kvm->arch.sca);
2930 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2934 if (!kvm_s390_use_sca_entries()) {
2935 if (id < KVM_MAX_VCPUS)
2939 if (id < KVM_S390_BSCA_CPU_SLOTS)
2941 if (!sclp.has_esca || !sclp.has_64bscao)
2944 mutex_lock(&kvm->lock);
2945 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2946 mutex_unlock(&kvm->lock);
2948 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2951 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2952 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2954 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2955 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2956 vcpu->arch.cputm_start = get_tod_clock_fast();
2957 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2960 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2961 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2963 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2964 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2965 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2966 vcpu->arch.cputm_start = 0;
2967 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2970 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2971 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2973 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2974 vcpu->arch.cputm_enabled = true;
2975 __start_cpu_timer_accounting(vcpu);
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2981 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2982 __stop_cpu_timer_accounting(vcpu);
2983 vcpu->arch.cputm_enabled = false;
2986 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2988 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2989 __enable_cpu_timer_accounting(vcpu);
2993 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2995 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2996 __disable_cpu_timer_accounting(vcpu);
3000 /* set the cpu timer - may only be called from the VCPU thread itself */
3001 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3003 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3005 if (vcpu->arch.cputm_enabled)
3006 vcpu->arch.cputm_start = get_tod_clock_fast();
3007 vcpu->arch.sie_block->cputm = cputm;
3008 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012 /* update and get the cpu timer - can also be called from other VCPU threads */
3013 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3018 if (unlikely(!vcpu->arch.cputm_enabled))
3019 return vcpu->arch.sie_block->cputm;
3021 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3023 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3025 * If the writer would ever execute a read in the critical
3026 * section, e.g. in irq context, we have a deadlock.
3028 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3029 value = vcpu->arch.sie_block->cputm;
3030 /* if cputm_start is 0, accounting is being started/stopped */
3031 if (likely(vcpu->arch.cputm_start))
3032 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3033 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3038 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3041 gmap_enable(vcpu->arch.enabled_gmap);
3042 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3043 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3044 __start_cpu_timer_accounting(vcpu);
3048 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3051 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052 __stop_cpu_timer_accounting(vcpu);
3053 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3054 vcpu->arch.enabled_gmap = gmap_get_enabled();
3055 gmap_disable(vcpu->arch.enabled_gmap);
3059 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3061 mutex_lock(&vcpu->kvm->lock);
3063 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3064 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3066 mutex_unlock(&vcpu->kvm->lock);
3067 if (!kvm_is_ucontrol(vcpu->kvm)) {
3068 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3071 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3072 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3073 /* make vcpu_load load the right gmap on the first trigger */
3074 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3077 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3079 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3080 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3085 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3087 /* At least one ECC subfunction must be present */
3088 return kvm_has_pckmo_subfunc(kvm, 32) ||
3089 kvm_has_pckmo_subfunc(kvm, 33) ||
3090 kvm_has_pckmo_subfunc(kvm, 34) ||
3091 kvm_has_pckmo_subfunc(kvm, 40) ||
3092 kvm_has_pckmo_subfunc(kvm, 41);
3096 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3099 * If the AP instructions are not being interpreted and the MSAX3
3100 * facility is not configured for the guest, there is nothing to set up.
3102 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3105 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3106 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3107 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3108 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3110 if (vcpu->kvm->arch.crypto.apie)
3111 vcpu->arch.sie_block->eca |= ECA_APIE;
3113 /* Set up protected key support */
3114 if (vcpu->kvm->arch.crypto.aes_kw) {
3115 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3116 /* ecc is also wrapped with AES key */
3117 if (kvm_has_pckmo_ecc(vcpu->kvm))
3118 vcpu->arch.sie_block->ecd |= ECD_ECC;
3121 if (vcpu->kvm->arch.crypto.dea_kw)
3122 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3125 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3127 free_page(vcpu->arch.sie_block->cbrlo);
3128 vcpu->arch.sie_block->cbrlo = 0;
3131 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3133 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3134 if (!vcpu->arch.sie_block->cbrlo)
3139 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3141 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3143 vcpu->arch.sie_block->ibc = model->ibc;
3144 if (test_kvm_facility(vcpu->kvm, 7))
3145 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3148 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3153 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3157 if (test_kvm_facility(vcpu->kvm, 78))
3158 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3159 else if (test_kvm_facility(vcpu->kvm, 8))
3160 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3162 kvm_s390_vcpu_setup_model(vcpu);
3164 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3165 if (MACHINE_HAS_ESOP)
3166 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3167 if (test_kvm_facility(vcpu->kvm, 9))
3168 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3169 if (test_kvm_facility(vcpu->kvm, 73))
3170 vcpu->arch.sie_block->ecb |= ECB_TE;
3172 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3173 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3174 if (test_kvm_facility(vcpu->kvm, 130))
3175 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3176 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3178 vcpu->arch.sie_block->eca |= ECA_CEI;
3180 vcpu->arch.sie_block->eca |= ECA_IB;
3182 vcpu->arch.sie_block->eca |= ECA_SII;
3183 if (sclp.has_sigpif)
3184 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3185 if (test_kvm_facility(vcpu->kvm, 129)) {
3186 vcpu->arch.sie_block->eca |= ECA_VX;
3187 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3189 if (test_kvm_facility(vcpu->kvm, 139))
3190 vcpu->arch.sie_block->ecd |= ECD_MEF;
3191 if (test_kvm_facility(vcpu->kvm, 156))
3192 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3193 if (vcpu->arch.sie_block->gd) {
3194 vcpu->arch.sie_block->eca |= ECA_AIV;
3195 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3196 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3198 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3200 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3203 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3205 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3207 if (vcpu->kvm->arch.use_cmma) {
3208 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3212 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3213 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3215 vcpu->arch.sie_block->hpid = HPID_KVM;
3217 kvm_s390_vcpu_crypto_setup(vcpu);
3219 mutex_lock(&vcpu->kvm->lock);
3220 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3221 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3223 kvm_s390_vcpu_unsetup_cmma(vcpu);
3225 mutex_unlock(&vcpu->kvm->lock);
3230 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3232 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3237 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3239 struct sie_page *sie_page;
3242 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3243 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3247 vcpu->arch.sie_block = &sie_page->sie_block;
3248 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3250 /* the real guest size will always be smaller than msl */
3251 vcpu->arch.sie_block->mso = 0;
3252 vcpu->arch.sie_block->msl = sclp.hamax;
3254 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3255 spin_lock_init(&vcpu->arch.local_int.lock);
3256 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3257 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3258 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3259 seqcount_init(&vcpu->arch.cputm_seqcount);
3261 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3262 kvm_clear_async_pf_completion_queue(vcpu);
3263 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3269 kvm_s390_set_prefix(vcpu, 0);
3270 if (test_kvm_facility(vcpu->kvm, 64))
3271 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3272 if (test_kvm_facility(vcpu->kvm, 82))
3273 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3274 if (test_kvm_facility(vcpu->kvm, 133))
3275 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3276 if (test_kvm_facility(vcpu->kvm, 156))
3277 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3278 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3279 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3282 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3284 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3286 if (kvm_is_ucontrol(vcpu->kvm)) {
3287 rc = __kvm_ucontrol_vcpu_init(vcpu);
3289 goto out_free_sie_block;
3292 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3293 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3294 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3296 rc = kvm_s390_vcpu_setup(vcpu);
3298 goto out_ucontrol_uninit;
3301 out_ucontrol_uninit:
3302 if (kvm_is_ucontrol(vcpu->kvm))
3303 gmap_remove(vcpu->arch.gmap);
3305 free_page((unsigned long)(vcpu->arch.sie_block));
3309 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3311 return kvm_s390_vcpu_has_irq(vcpu, 0);
3314 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3316 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3319 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3321 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3325 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3327 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3330 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3332 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3336 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3338 return atomic_read(&vcpu->arch.sie_block->prog20) &
3339 (PROG_BLOCK_SIE | PROG_REQUEST);
3342 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3344 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3348 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3349 * If the CPU is not running (e.g. waiting as idle) the function will
3350 * return immediately. */
3351 void exit_sie(struct kvm_vcpu *vcpu)
3353 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3354 kvm_s390_vsie_kick(vcpu);
3355 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3359 /* Kick a guest cpu out of SIE to process a request synchronously */
3360 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3362 kvm_make_request(req, vcpu);
3363 kvm_s390_vcpu_request(vcpu);
3366 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3369 struct kvm *kvm = gmap->private;
3370 struct kvm_vcpu *vcpu;
3371 unsigned long prefix;
3374 if (gmap_is_shadow(gmap))
3376 if (start >= 1UL << 31)
3377 /* We are only interested in prefix pages */
3379 kvm_for_each_vcpu(i, vcpu, kvm) {
3380 /* match against both prefix pages */
3381 prefix = kvm_s390_get_prefix(vcpu);
3382 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3383 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3385 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3390 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3392 /* do not poll with more than halt_poll_max_steal percent of steal time */
3393 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3394 halt_poll_max_steal) {
3395 vcpu->stat.halt_no_poll_steal++;
3401 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3403 /* kvm common code refers to this, but never calls it */
3408 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3409 struct kvm_one_reg *reg)
3414 case KVM_REG_S390_TODPR:
3415 r = put_user(vcpu->arch.sie_block->todpr,
3416 (u32 __user *)reg->addr);
3418 case KVM_REG_S390_EPOCHDIFF:
3419 r = put_user(vcpu->arch.sie_block->epoch,
3420 (u64 __user *)reg->addr);
3422 case KVM_REG_S390_CPU_TIMER:
3423 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3424 (u64 __user *)reg->addr);
3426 case KVM_REG_S390_CLOCK_COMP:
3427 r = put_user(vcpu->arch.sie_block->ckc,
3428 (u64 __user *)reg->addr);
3430 case KVM_REG_S390_PFTOKEN:
3431 r = put_user(vcpu->arch.pfault_token,
3432 (u64 __user *)reg->addr);
3434 case KVM_REG_S390_PFCOMPARE:
3435 r = put_user(vcpu->arch.pfault_compare,
3436 (u64 __user *)reg->addr);
3438 case KVM_REG_S390_PFSELECT:
3439 r = put_user(vcpu->arch.pfault_select,
3440 (u64 __user *)reg->addr);
3442 case KVM_REG_S390_PP:
3443 r = put_user(vcpu->arch.sie_block->pp,
3444 (u64 __user *)reg->addr);
3446 case KVM_REG_S390_GBEA:
3447 r = put_user(vcpu->arch.sie_block->gbea,
3448 (u64 __user *)reg->addr);
3457 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3458 struct kvm_one_reg *reg)
3464 case KVM_REG_S390_TODPR:
3465 r = get_user(vcpu->arch.sie_block->todpr,
3466 (u32 __user *)reg->addr);
3468 case KVM_REG_S390_EPOCHDIFF:
3469 r = get_user(vcpu->arch.sie_block->epoch,
3470 (u64 __user *)reg->addr);
3472 case KVM_REG_S390_CPU_TIMER:
3473 r = get_user(val, (u64 __user *)reg->addr);
3475 kvm_s390_set_cpu_timer(vcpu, val);
3477 case KVM_REG_S390_CLOCK_COMP:
3478 r = get_user(vcpu->arch.sie_block->ckc,
3479 (u64 __user *)reg->addr);
3481 case KVM_REG_S390_PFTOKEN:
3482 r = get_user(vcpu->arch.pfault_token,
3483 (u64 __user *)reg->addr);
3484 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3485 kvm_clear_async_pf_completion_queue(vcpu);
3487 case KVM_REG_S390_PFCOMPARE:
3488 r = get_user(vcpu->arch.pfault_compare,
3489 (u64 __user *)reg->addr);
3491 case KVM_REG_S390_PFSELECT:
3492 r = get_user(vcpu->arch.pfault_select,
3493 (u64 __user *)reg->addr);
3495 case KVM_REG_S390_PP:
3496 r = get_user(vcpu->arch.sie_block->pp,
3497 (u64 __user *)reg->addr);
3499 case KVM_REG_S390_GBEA:
3500 r = get_user(vcpu->arch.sie_block->gbea,
3501 (u64 __user *)reg->addr);
3510 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3512 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3513 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3514 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3516 kvm_clear_async_pf_completion_queue(vcpu);
3517 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3518 kvm_s390_vcpu_stop(vcpu);
3519 kvm_s390_clear_local_irqs(vcpu);
3522 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3524 /* Initial reset is a superset of the normal reset */
3525 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3528 * This equals initial cpu reset in pop, but we don't switch to ESA.
3529 * We do not only reset the internal data, but also ...
3531 vcpu->arch.sie_block->gpsw.mask = 0;
3532 vcpu->arch.sie_block->gpsw.addr = 0;
3533 kvm_s390_set_prefix(vcpu, 0);
3534 kvm_s390_set_cpu_timer(vcpu, 0);
3535 vcpu->arch.sie_block->ckc = 0;
3536 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3537 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3538 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3540 /* ... the data in sync regs */
3541 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3542 vcpu->run->s.regs.ckc = 0;
3543 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3544 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3545 vcpu->run->psw_addr = 0;
3546 vcpu->run->psw_mask = 0;
3547 vcpu->run->s.regs.todpr = 0;
3548 vcpu->run->s.regs.cputm = 0;
3549 vcpu->run->s.regs.ckc = 0;
3550 vcpu->run->s.regs.pp = 0;
3551 vcpu->run->s.regs.gbea = 1;
3552 vcpu->run->s.regs.fpc = 0;
3554 * Do not reset these registers in the protected case, as some of
3555 * them are overlayed and they are not accessible in this case
3558 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3559 vcpu->arch.sie_block->gbea = 1;
3560 vcpu->arch.sie_block->pp = 0;
3561 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3562 vcpu->arch.sie_block->todpr = 0;
3566 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3568 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3570 /* Clear reset is a superset of the initial reset */
3571 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3573 memset(®s->gprs, 0, sizeof(regs->gprs));
3574 memset(®s->vrs, 0, sizeof(regs->vrs));
3575 memset(®s->acrs, 0, sizeof(regs->acrs));
3576 memset(®s->gscb, 0, sizeof(regs->gscb));
3579 regs->etoken_extension = 0;
3582 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3585 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3590 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3593 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3598 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3599 struct kvm_sregs *sregs)
3603 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3604 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3610 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3611 struct kvm_sregs *sregs)
3615 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3616 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3622 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3628 if (test_fp_ctl(fpu->fpc)) {
3632 vcpu->run->s.regs.fpc = fpu->fpc;
3634 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3635 (freg_t *) fpu->fprs);
3637 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3644 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3648 /* make sure we have the latest values */
3651 convert_vx_to_fp((freg_t *) fpu->fprs,
3652 (__vector128 *) vcpu->run->s.regs.vrs);
3654 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3655 fpu->fpc = vcpu->run->s.regs.fpc;
3661 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3665 if (!is_vcpu_stopped(vcpu))
3668 vcpu->run->psw_mask = psw.mask;
3669 vcpu->run->psw_addr = psw.addr;
3674 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3675 struct kvm_translation *tr)
3677 return -EINVAL; /* not implemented yet */
3680 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3681 KVM_GUESTDBG_USE_HW_BP | \
3682 KVM_GUESTDBG_ENABLE)
3684 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3685 struct kvm_guest_debug *dbg)
3691 vcpu->guest_debug = 0;
3692 kvm_s390_clear_bp_data(vcpu);
3694 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3698 if (!sclp.has_gpere) {
3703 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3704 vcpu->guest_debug = dbg->control;
3705 /* enforce guest PER */
3706 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3708 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3709 rc = kvm_s390_import_bp_data(vcpu, dbg);
3711 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3712 vcpu->arch.guestdbg.last_bp = 0;
3716 vcpu->guest_debug = 0;
3717 kvm_s390_clear_bp_data(vcpu);
3718 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3726 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3727 struct kvm_mp_state *mp_state)
3733 /* CHECK_STOP and LOAD are not supported yet */
3734 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3735 KVM_MP_STATE_OPERATING;
3741 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3742 struct kvm_mp_state *mp_state)
3748 /* user space knows about this interface - let it control the state */
3749 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3751 switch (mp_state->mp_state) {
3752 case KVM_MP_STATE_STOPPED:
3753 rc = kvm_s390_vcpu_stop(vcpu);
3755 case KVM_MP_STATE_OPERATING:
3756 rc = kvm_s390_vcpu_start(vcpu);
3758 case KVM_MP_STATE_LOAD:
3759 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3763 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3765 case KVM_MP_STATE_CHECK_STOP:
3766 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3775 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3777 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3780 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3783 kvm_s390_vcpu_request_handled(vcpu);
3784 if (!kvm_request_pending(vcpu))
3787 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3788 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3789 * This ensures that the ipte instruction for this request has
3790 * already finished. We might race against a second unmapper that
3791 * wants to set the blocking bit. Lets just retry the request loop.
3793 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3795 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3796 kvm_s390_get_prefix(vcpu),
3797 PAGE_SIZE * 2, PROT_WRITE);
3799 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3805 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3806 vcpu->arch.sie_block->ihcpu = 0xffff;
3810 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3811 if (!ibs_enabled(vcpu)) {
3812 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3813 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3818 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3819 if (ibs_enabled(vcpu)) {
3820 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3821 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3826 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3827 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3831 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3833 * Disable CMM virtualization; we will emulate the ESSA
3834 * instruction manually, in order to provide additional
3835 * functionalities needed for live migration.
3837 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3841 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3843 * Re-enable CMM virtualization if CMMA is available and
3844 * CMM has been used.
3846 if ((vcpu->kvm->arch.use_cmma) &&
3847 (vcpu->kvm->mm->context.uses_cmm))
3848 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3852 /* nothing to do, just clear the request */
3853 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3854 /* we left the vsie handler, nothing to do, just clear the request */
3855 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3860 void kvm_s390_set_tod_clock(struct kvm *kvm,
3861 const struct kvm_s390_vm_tod_clock *gtod)
3863 struct kvm_vcpu *vcpu;
3864 struct kvm_s390_tod_clock_ext htod;
3867 mutex_lock(&kvm->lock);
3870 get_tod_clock_ext((char *)&htod);
3872 kvm->arch.epoch = gtod->tod - htod.tod;
3874 if (test_kvm_facility(kvm, 139)) {
3875 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3876 if (kvm->arch.epoch > gtod->tod)
3877 kvm->arch.epdx -= 1;
3880 kvm_s390_vcpu_block_all(kvm);
3881 kvm_for_each_vcpu(i, vcpu, kvm) {
3882 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3883 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3886 kvm_s390_vcpu_unblock_all(kvm);
3888 mutex_unlock(&kvm->lock);
3892 * kvm_arch_fault_in_page - fault-in guest page if necessary
3893 * @vcpu: The corresponding virtual cpu
3894 * @gpa: Guest physical address
3895 * @writable: Whether the page should be writable or not
3897 * Make sure that a guest page has been faulted-in on the host.
3899 * Return: Zero on success, negative error code otherwise.
3901 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3903 return gmap_fault(vcpu->arch.gmap, gpa,
3904 writable ? FAULT_FLAG_WRITE : 0);
3907 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3908 unsigned long token)
3910 struct kvm_s390_interrupt inti;
3911 struct kvm_s390_irq irq;
3914 irq.u.ext.ext_params2 = token;
3915 irq.type = KVM_S390_INT_PFAULT_INIT;
3916 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3918 inti.type = KVM_S390_INT_PFAULT_DONE;
3919 inti.parm64 = token;
3920 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3924 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3925 struct kvm_async_pf *work)
3927 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3928 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3931 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3932 struct kvm_async_pf *work)
3934 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3935 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3938 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3939 struct kvm_async_pf *work)
3941 /* s390 will always inject the page directly */
3944 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3947 * s390 will always inject the page directly,
3948 * but we still want check_async_completion to cleanup
3953 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3956 struct kvm_arch_async_pf arch;
3959 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3961 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3962 vcpu->arch.pfault_compare)
3964 if (psw_extint_disabled(vcpu))
3966 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3968 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3970 if (!vcpu->arch.gmap->pfault_enabled)
3973 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3974 hva += current->thread.gmap_addr & ~PAGE_MASK;
3975 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3978 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3982 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3987 * On s390 notifications for arriving pages will be delivered directly
3988 * to the guest but the house keeping for completed pfaults is
3989 * handled outside the worker.
3991 kvm_check_async_pf_completion(vcpu);
3993 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3994 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3999 if (test_cpu_flag(CIF_MCCK_PENDING))
4002 if (!kvm_is_ucontrol(vcpu->kvm)) {
4003 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4008 rc = kvm_s390_handle_requests(vcpu);
4012 if (guestdbg_enabled(vcpu)) {
4013 kvm_s390_backup_guest_per_regs(vcpu);
4014 kvm_s390_patch_guest_per_regs(vcpu);
4017 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4019 vcpu->arch.sie_block->icptcode = 0;
4020 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4021 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4022 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4027 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4029 struct kvm_s390_pgm_info pgm_info = {
4030 .code = PGM_ADDRESSING,
4035 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4036 trace_kvm_s390_sie_fault(vcpu);
4039 * We want to inject an addressing exception, which is defined as a
4040 * suppressing or terminating exception. However, since we came here
4041 * by a DAT access exception, the PSW still points to the faulting
4042 * instruction since DAT exceptions are nullifying. So we've got
4043 * to look up the current opcode to get the length of the instruction
4044 * to be able to forward the PSW.
4046 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4047 ilen = insn_length(opcode);
4051 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4052 * Forward by arbitrary ilc, injection will take care of
4053 * nullification if necessary.
4055 pgm_info = vcpu->arch.pgm;
4058 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4059 kvm_s390_forward_psw(vcpu, ilen);
4060 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4063 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4065 struct mcck_volatile_info *mcck_info;
4066 struct sie_page *sie_page;
4068 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4069 vcpu->arch.sie_block->icptcode);
4070 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4072 if (guestdbg_enabled(vcpu))
4073 kvm_s390_restore_guest_per_regs(vcpu);
4075 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4076 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4078 if (exit_reason == -EINTR) {
4079 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4080 sie_page = container_of(vcpu->arch.sie_block,
4081 struct sie_page, sie_block);
4082 mcck_info = &sie_page->mcck_info;
4083 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4087 if (vcpu->arch.sie_block->icptcode > 0) {
4088 int rc = kvm_handle_sie_intercept(vcpu);
4090 if (rc != -EOPNOTSUPP)
4092 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4093 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4094 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4095 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4097 } else if (exit_reason != -EFAULT) {
4098 vcpu->stat.exit_null++;
4100 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4101 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4102 vcpu->run->s390_ucontrol.trans_exc_code =
4103 current->thread.gmap_addr;
4104 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4106 } else if (current->thread.gmap_pfault) {
4107 trace_kvm_s390_major_guest_pfault(vcpu);
4108 current->thread.gmap_pfault = 0;
4109 if (kvm_arch_setup_async_pf(vcpu))
4111 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4113 return vcpu_post_run_fault_in_sie(vcpu);
4116 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4117 static int __vcpu_run(struct kvm_vcpu *vcpu)
4119 int rc, exit_reason;
4120 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4123 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4124 * ning the guest), so that memslots (and other stuff) are protected
4126 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4129 rc = vcpu_pre_run(vcpu);
4133 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4135 * As PF_VCPU will be used in fault handler, between
4136 * guest_enter and guest_exit should be no uaccess.
4138 local_irq_disable();
4139 guest_enter_irqoff();
4140 __disable_cpu_timer_accounting(vcpu);
4142 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4143 memcpy(sie_page->pv_grregs,
4144 vcpu->run->s.regs.gprs,
4145 sizeof(sie_page->pv_grregs));
4147 exit_reason = sie64a(vcpu->arch.sie_block,
4148 vcpu->run->s.regs.gprs);
4149 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4150 memcpy(vcpu->run->s.regs.gprs,
4151 sie_page->pv_grregs,
4152 sizeof(sie_page->pv_grregs));
4154 * We're not allowed to inject interrupts on intercepts
4155 * that leave the guest state in an "in-between" state
4156 * where the next SIE entry will do a continuation.
4157 * Fence interrupts in our "internal" PSW.
4159 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4160 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4161 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4164 local_irq_disable();
4165 __enable_cpu_timer_accounting(vcpu);
4166 guest_exit_irqoff();
4168 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4170 rc = vcpu_post_run(vcpu, exit_reason);
4171 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4173 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4177 static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4179 struct runtime_instr_cb *riccb;
4182 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4183 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4184 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4185 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4186 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4187 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4188 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4189 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4191 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4192 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4193 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4194 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4195 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4196 kvm_clear_async_pf_completion_queue(vcpu);
4199 * If userspace sets the riccb (e.g. after migration) to a valid state,
4200 * we should enable RI here instead of doing the lazy enablement.
4202 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4203 test_kvm_facility(vcpu->kvm, 64) &&
4205 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4206 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4207 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4210 * If userspace sets the gscb (e.g. after migration) to non-zero,
4211 * we should enable GS here instead of doing the lazy enablement.
4213 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4214 test_kvm_facility(vcpu->kvm, 133) &&
4216 !vcpu->arch.gs_enabled) {
4217 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4218 vcpu->arch.sie_block->ecb |= ECB_GS;
4219 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4220 vcpu->arch.gs_enabled = 1;
4222 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4223 test_kvm_facility(vcpu->kvm, 82)) {
4224 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4225 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4227 if (MACHINE_HAS_GS) {
4229 __ctl_set_bit(2, 4);
4230 if (current->thread.gs_cb) {
4231 vcpu->arch.host_gscb = current->thread.gs_cb;
4232 save_gs_cb(vcpu->arch.host_gscb);
4234 if (vcpu->arch.gs_enabled) {
4235 current->thread.gs_cb = (struct gs_cb *)
4236 &vcpu->run->s.regs.gscb;
4237 restore_gs_cb(current->thread.gs_cb);
4241 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4244 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4246 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4247 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4248 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4249 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4250 /* some control register changes require a tlb flush */
4251 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4253 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4254 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4255 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4257 save_access_regs(vcpu->arch.host_acrs);
4258 restore_access_regs(vcpu->run->s.regs.acrs);
4259 /* save host (userspace) fprs/vrs */
4261 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4262 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4264 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4266 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4267 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4268 if (test_fp_ctl(current->thread.fpu.fpc))
4269 /* User space provided an invalid FPC, let's clear it */
4270 current->thread.fpu.fpc = 0;
4272 /* Sync fmt2 only data */
4273 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4274 sync_regs_fmt2(vcpu, kvm_run);
4277 * In several places we have to modify our internal view to
4278 * not do things that are disallowed by the ultravisor. For
4279 * example we must not inject interrupts after specific exits
4280 * (e.g. 112 prefix page not secure). We do this by turning
4281 * off the machine check, external and I/O interrupt bits
4282 * of our PSW copy. To avoid getting validity intercepts, we
4283 * do only accept the condition code from userspace.
4285 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4286 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4290 kvm_run->kvm_dirty_regs = 0;
4293 static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4295 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4296 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4297 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4298 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4299 if (MACHINE_HAS_GS) {
4300 __ctl_set_bit(2, 4);
4301 if (vcpu->arch.gs_enabled)
4302 save_gs_cb(current->thread.gs_cb);
4304 current->thread.gs_cb = vcpu->arch.host_gscb;
4305 restore_gs_cb(vcpu->arch.host_gscb);
4307 if (!vcpu->arch.host_gscb)
4308 __ctl_clear_bit(2, 4);
4309 vcpu->arch.host_gscb = NULL;
4311 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4314 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4316 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4317 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4318 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4319 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4320 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4321 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4322 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4323 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4324 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4325 save_access_regs(vcpu->run->s.regs.acrs);
4326 restore_access_regs(vcpu->arch.host_acrs);
4327 /* Save guest register state */
4329 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4330 /* Restore will be done lazily at return */
4331 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4332 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4333 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4334 store_regs_fmt2(vcpu, kvm_run);
4337 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4339 struct kvm_run *kvm_run = vcpu->run;
4342 if (kvm_run->immediate_exit)
4345 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4346 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4351 if (guestdbg_exit_pending(vcpu)) {
4352 kvm_s390_prepare_debug_exit(vcpu);
4357 kvm_sigset_activate(vcpu);
4360 * no need to check the return value of vcpu_start as it can only have
4361 * an error for protvirt, but protvirt means user cpu state
4363 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4364 kvm_s390_vcpu_start(vcpu);
4365 } else if (is_vcpu_stopped(vcpu)) {
4366 pr_err_ratelimited("can't run stopped vcpu %d\n",
4372 sync_regs(vcpu, kvm_run);
4373 enable_cpu_timer_accounting(vcpu);
4376 rc = __vcpu_run(vcpu);
4378 if (signal_pending(current) && !rc) {
4379 kvm_run->exit_reason = KVM_EXIT_INTR;
4383 if (guestdbg_exit_pending(vcpu) && !rc) {
4384 kvm_s390_prepare_debug_exit(vcpu);
4388 if (rc == -EREMOTE) {
4389 /* userspace support is needed, kvm_run has been prepared */
4393 disable_cpu_timer_accounting(vcpu);
4394 store_regs(vcpu, kvm_run);
4396 kvm_sigset_deactivate(vcpu);
4398 vcpu->stat.exit_userspace++;
4405 * store status at address
4406 * we use have two special cases:
4407 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4408 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4410 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4412 unsigned char archmode = 1;
4413 freg_t fprs[NUM_FPRS];
4418 px = kvm_s390_get_prefix(vcpu);
4419 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4420 if (write_guest_abs(vcpu, 163, &archmode, 1))
4423 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4424 if (write_guest_real(vcpu, 163, &archmode, 1))
4428 gpa -= __LC_FPREGS_SAVE_AREA;
4430 /* manually convert vector registers if necessary */
4431 if (MACHINE_HAS_VX) {
4432 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4433 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4436 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4437 vcpu->run->s.regs.fprs, 128);
4439 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4440 vcpu->run->s.regs.gprs, 128);
4441 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4442 &vcpu->arch.sie_block->gpsw, 16);
4443 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4445 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4446 &vcpu->run->s.regs.fpc, 4);
4447 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4448 &vcpu->arch.sie_block->todpr, 4);
4449 cputm = kvm_s390_get_cpu_timer(vcpu);
4450 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4452 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4453 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4455 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4456 &vcpu->run->s.regs.acrs, 64);
4457 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4458 &vcpu->arch.sie_block->gcr, 128);
4459 return rc ? -EFAULT : 0;
4462 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4465 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4466 * switch in the run ioctl. Let's update our copies before we save
4467 * it into the save area
4470 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4471 save_access_regs(vcpu->run->s.regs.acrs);
4473 return kvm_s390_store_status_unloaded(vcpu, addr);
4476 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4478 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4479 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4482 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4485 struct kvm_vcpu *vcpu;
4487 kvm_for_each_vcpu(i, vcpu, kvm) {
4488 __disable_ibs_on_vcpu(vcpu);
4492 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4496 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4497 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4500 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4502 int i, online_vcpus, r = 0, started_vcpus = 0;
4504 if (!is_vcpu_stopped(vcpu))
4507 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4508 /* Only one cpu at a time may enter/leave the STOPPED state. */
4509 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4510 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4512 /* Let's tell the UV that we want to change into the operating state */
4513 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4514 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4516 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4521 for (i = 0; i < online_vcpus; i++) {
4522 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4526 if (started_vcpus == 0) {
4527 /* we're the only active VCPU -> speed it up */
4528 __enable_ibs_on_vcpu(vcpu);
4529 } else if (started_vcpus == 1) {
4531 * As we are starting a second VCPU, we have to disable
4532 * the IBS facility on all VCPUs to remove potentially
4533 * oustanding ENABLE requests.
4535 __disable_ibs_on_all_vcpus(vcpu->kvm);
4538 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4540 * The real PSW might have changed due to a RESTART interpreted by the
4541 * ultravisor. We block all interrupts and let the next sie exit
4544 if (kvm_s390_pv_cpu_is_protected(vcpu))
4545 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4547 * Another VCPU might have used IBS while we were offline.
4548 * Let's play safe and flush the VCPU at startup.
4550 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4551 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4555 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4557 int i, online_vcpus, r = 0, started_vcpus = 0;
4558 struct kvm_vcpu *started_vcpu = NULL;
4560 if (is_vcpu_stopped(vcpu))
4563 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4564 /* Only one cpu at a time may enter/leave the STOPPED state. */
4565 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4566 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4568 /* Let's tell the UV that we want to change into the stopped state */
4569 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4570 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4572 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4577 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4578 kvm_s390_clear_stop_irq(vcpu);
4580 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4581 __disable_ibs_on_vcpu(vcpu);
4583 for (i = 0; i < online_vcpus; i++) {
4584 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4586 started_vcpu = vcpu->kvm->vcpus[i];
4590 if (started_vcpus == 1) {
4592 * As we only have one VCPU left, we want to enable the
4593 * IBS facility for that VCPU to speed it up.
4595 __enable_ibs_on_vcpu(started_vcpu);
4598 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4602 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4603 struct kvm_enable_cap *cap)
4611 case KVM_CAP_S390_CSS_SUPPORT:
4612 if (!vcpu->kvm->arch.css_support) {
4613 vcpu->kvm->arch.css_support = 1;
4614 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4615 trace_kvm_s390_enable_css(vcpu->kvm);
4626 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4627 struct kvm_s390_mem_op *mop)
4629 void __user *uaddr = (void __user *)mop->buf;
4632 if (mop->flags || !mop->size)
4634 if (mop->size + mop->sida_offset < mop->size)
4636 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4640 case KVM_S390_MEMOP_SIDA_READ:
4641 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4642 mop->sida_offset), mop->size))
4646 case KVM_S390_MEMOP_SIDA_WRITE:
4647 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4648 mop->sida_offset), uaddr, mop->size))
4654 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4655 struct kvm_s390_mem_op *mop)
4657 void __user *uaddr = (void __user *)mop->buf;
4658 void *tmpbuf = NULL;
4660 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4661 | KVM_S390_MEMOP_F_CHECK_ONLY;
4663 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4666 if (mop->size > MEM_OP_MAX_SIZE)
4669 if (kvm_s390_pv_cpu_is_protected(vcpu))
4672 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4673 tmpbuf = vmalloc(mop->size);
4679 case KVM_S390_MEMOP_LOGICAL_READ:
4680 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4681 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4682 mop->size, GACC_FETCH);
4685 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4687 if (copy_to_user(uaddr, tmpbuf, mop->size))
4691 case KVM_S390_MEMOP_LOGICAL_WRITE:
4692 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4693 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4694 mop->size, GACC_STORE);
4697 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4701 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4705 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4706 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4712 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4713 struct kvm_s390_mem_op *mop)
4717 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4720 case KVM_S390_MEMOP_LOGICAL_READ:
4721 case KVM_S390_MEMOP_LOGICAL_WRITE:
4722 r = kvm_s390_guest_mem_op(vcpu, mop);
4724 case KVM_S390_MEMOP_SIDA_READ:
4725 case KVM_S390_MEMOP_SIDA_WRITE:
4726 /* we are locked against sida going away by the vcpu->mutex */
4727 r = kvm_s390_guest_sida_op(vcpu, mop);
4733 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4737 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4738 unsigned int ioctl, unsigned long arg)
4740 struct kvm_vcpu *vcpu = filp->private_data;
4741 void __user *argp = (void __user *)arg;
4744 case KVM_S390_IRQ: {
4745 struct kvm_s390_irq s390irq;
4747 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4749 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4751 case KVM_S390_INTERRUPT: {
4752 struct kvm_s390_interrupt s390int;
4753 struct kvm_s390_irq s390irq = {};
4755 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4757 if (s390int_to_s390irq(&s390int, &s390irq))
4759 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4762 return -ENOIOCTLCMD;
4765 long kvm_arch_vcpu_ioctl(struct file *filp,
4766 unsigned int ioctl, unsigned long arg)
4768 struct kvm_vcpu *vcpu = filp->private_data;
4769 void __user *argp = (void __user *)arg;
4777 case KVM_S390_STORE_STATUS:
4778 idx = srcu_read_lock(&vcpu->kvm->srcu);
4779 r = kvm_s390_store_status_unloaded(vcpu, arg);
4780 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4782 case KVM_S390_SET_INITIAL_PSW: {
4786 if (copy_from_user(&psw, argp, sizeof(psw)))
4788 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4791 case KVM_S390_CLEAR_RESET:
4793 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4794 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4795 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4796 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4797 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4801 case KVM_S390_INITIAL_RESET:
4803 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4804 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4805 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4806 UVC_CMD_CPU_RESET_INITIAL,
4808 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4812 case KVM_S390_NORMAL_RESET:
4814 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4815 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4816 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4817 UVC_CMD_CPU_RESET, &rc, &rrc);
4818 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4822 case KVM_SET_ONE_REG:
4823 case KVM_GET_ONE_REG: {
4824 struct kvm_one_reg reg;
4826 if (kvm_s390_pv_cpu_is_protected(vcpu))
4829 if (copy_from_user(®, argp, sizeof(reg)))
4831 if (ioctl == KVM_SET_ONE_REG)
4832 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4834 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4837 #ifdef CONFIG_KVM_S390_UCONTROL
4838 case KVM_S390_UCAS_MAP: {
4839 struct kvm_s390_ucas_mapping ucasmap;
4841 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4846 if (!kvm_is_ucontrol(vcpu->kvm)) {
4851 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4852 ucasmap.vcpu_addr, ucasmap.length);
4855 case KVM_S390_UCAS_UNMAP: {
4856 struct kvm_s390_ucas_mapping ucasmap;
4858 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4863 if (!kvm_is_ucontrol(vcpu->kvm)) {
4868 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4873 case KVM_S390_VCPU_FAULT: {
4874 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4877 case KVM_ENABLE_CAP:
4879 struct kvm_enable_cap cap;
4881 if (copy_from_user(&cap, argp, sizeof(cap)))
4883 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4886 case KVM_S390_MEM_OP: {
4887 struct kvm_s390_mem_op mem_op;
4889 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4890 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4895 case KVM_S390_SET_IRQ_STATE: {
4896 struct kvm_s390_irq_state irq_state;
4899 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4901 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4902 irq_state.len == 0 ||
4903 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4907 /* do not use irq_state.flags, it will break old QEMUs */
4908 r = kvm_s390_set_irq_state(vcpu,
4909 (void __user *) irq_state.buf,
4913 case KVM_S390_GET_IRQ_STATE: {
4914 struct kvm_s390_irq_state irq_state;
4917 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4919 if (irq_state.len == 0) {
4923 /* do not use irq_state.flags, it will break old QEMUs */
4924 r = kvm_s390_get_irq_state(vcpu,
4925 (__u8 __user *) irq_state.buf,
4937 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4939 #ifdef CONFIG_KVM_S390_UCONTROL
4940 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4941 && (kvm_is_ucontrol(vcpu->kvm))) {
4942 vmf->page = virt_to_page(vcpu->arch.sie_block);
4943 get_page(vmf->page);
4947 return VM_FAULT_SIGBUS;
4950 /* Section: memory related */
4951 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4952 struct kvm_memory_slot *memslot,
4953 const struct kvm_userspace_memory_region *mem,
4954 enum kvm_mr_change change)
4956 /* A few sanity checks. We can have memory slots which have to be
4957 located/ended at a segment boundary (1MB). The memory in userland is
4958 ok to be fragmented into various different vmas. It is okay to mmap()
4959 and munmap() stuff in this slot after doing this call at any time */
4961 if (mem->userspace_addr & 0xffffful)
4964 if (mem->memory_size & 0xffffful)
4967 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4970 /* When we are protected, we should not change the memory slots */
4971 if (kvm_s390_pv_get_handle(kvm))
4976 void kvm_arch_commit_memory_region(struct kvm *kvm,
4977 const struct kvm_userspace_memory_region *mem,
4978 struct kvm_memory_slot *old,
4979 const struct kvm_memory_slot *new,
4980 enum kvm_mr_change change)
4986 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4987 old->npages * PAGE_SIZE);
4990 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4991 old->npages * PAGE_SIZE);
4996 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4997 mem->guest_phys_addr, mem->memory_size);
4999 case KVM_MR_FLAGS_ONLY:
5002 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5005 pr_warn("failed to commit memory region\n");
5009 static inline unsigned long nonhyp_mask(int i)
5011 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5013 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5016 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5018 vcpu->valid_wakeup = false;
5021 static int __init kvm_s390_init(void)
5025 if (!sclp.has_sief2) {
5026 pr_info("SIE is not available\n");
5030 if (nested && hpage) {
5031 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5035 for (i = 0; i < 16; i++)
5036 kvm_s390_fac_base[i] |=
5037 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5039 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5042 static void __exit kvm_s390_exit(void)
5047 module_init(kvm_s390_init);
5048 module_exit(kvm_s390_exit);
5051 * Enable autoloading of the kvm module.
5052 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5053 * since x86 takes a different approach.
5055 #include <linux/miscdevice.h>
5056 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5057 MODULE_ALIAS("devname:kvm");