1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 VCPU_STAT("userspace_handled", exit_userspace),
63 VCPU_STAT("exit_null", exit_null),
64 VCPU_STAT("pfault_sync", pfault_sync),
65 VCPU_STAT("exit_validity", exit_validity),
66 VCPU_STAT("exit_stop_request", exit_stop_request),
67 VCPU_STAT("exit_external_request", exit_external_request),
68 VCPU_STAT("exit_io_request", exit_io_request),
69 VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
70 VCPU_STAT("exit_instruction", exit_instruction),
71 VCPU_STAT("exit_pei", exit_pei),
72 VCPU_STAT("exit_program_interruption", exit_program_interruption),
73 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
74 VCPU_STAT("exit_operation_exception", exit_operation_exception),
75 VCPU_STAT("halt_successful_poll", halt_successful_poll),
76 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
77 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
78 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
79 VCPU_STAT("halt_wakeup", halt_wakeup),
80 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
81 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
82 VCPU_STAT("instruction_lctlg", instruction_lctlg),
83 VCPU_STAT("instruction_lctl", instruction_lctl),
84 VCPU_STAT("instruction_stctl", instruction_stctl),
85 VCPU_STAT("instruction_stctg", instruction_stctg),
86 VCPU_STAT("deliver_ckc", deliver_ckc),
87 VCPU_STAT("deliver_cputm", deliver_cputm),
88 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
89 VCPU_STAT("deliver_external_call", deliver_external_call),
90 VCPU_STAT("deliver_service_signal", deliver_service_signal),
91 VCPU_STAT("deliver_virtio", deliver_virtio),
92 VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
93 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
94 VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
95 VCPU_STAT("deliver_program", deliver_program),
96 VCPU_STAT("deliver_io", deliver_io),
97 VCPU_STAT("deliver_machine_check", deliver_machine_check),
98 VCPU_STAT("exit_wait_state", exit_wait_state),
99 VCPU_STAT("inject_ckc", inject_ckc),
100 VCPU_STAT("inject_cputm", inject_cputm),
101 VCPU_STAT("inject_external_call", inject_external_call),
102 VM_STAT("inject_float_mchk", inject_float_mchk),
103 VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
104 VM_STAT("inject_io", inject_io),
105 VCPU_STAT("inject_mchk", inject_mchk),
106 VM_STAT("inject_pfault_done", inject_pfault_done),
107 VCPU_STAT("inject_program", inject_program),
108 VCPU_STAT("inject_restart", inject_restart),
109 VM_STAT("inject_service_signal", inject_service_signal),
110 VCPU_STAT("inject_set_prefix", inject_set_prefix),
111 VCPU_STAT("inject_stop_signal", inject_stop_signal),
112 VCPU_STAT("inject_pfault_init", inject_pfault_init),
113 VM_STAT("inject_virtio", inject_virtio),
114 VCPU_STAT("instruction_epsw", instruction_epsw),
115 VCPU_STAT("instruction_gs", instruction_gs),
116 VCPU_STAT("instruction_io_other", instruction_io_other),
117 VCPU_STAT("instruction_lpsw", instruction_lpsw),
118 VCPU_STAT("instruction_lpswe", instruction_lpswe),
119 VCPU_STAT("instruction_pfmf", instruction_pfmf),
120 VCPU_STAT("instruction_ptff", instruction_ptff),
121 VCPU_STAT("instruction_stidp", instruction_stidp),
122 VCPU_STAT("instruction_sck", instruction_sck),
123 VCPU_STAT("instruction_sckpf", instruction_sckpf),
124 VCPU_STAT("instruction_spx", instruction_spx),
125 VCPU_STAT("instruction_stpx", instruction_stpx),
126 VCPU_STAT("instruction_stap", instruction_stap),
127 VCPU_STAT("instruction_iske", instruction_iske),
128 VCPU_STAT("instruction_ri", instruction_ri),
129 VCPU_STAT("instruction_rrbe", instruction_rrbe),
130 VCPU_STAT("instruction_sske", instruction_sske),
131 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
132 VCPU_STAT("instruction_essa", instruction_essa),
133 VCPU_STAT("instruction_stsi", instruction_stsi),
134 VCPU_STAT("instruction_stfl", instruction_stfl),
135 VCPU_STAT("instruction_tb", instruction_tb),
136 VCPU_STAT("instruction_tpi", instruction_tpi),
137 VCPU_STAT("instruction_tprot", instruction_tprot),
138 VCPU_STAT("instruction_tsch", instruction_tsch),
139 VCPU_STAT("instruction_sthyi", instruction_sthyi),
140 VCPU_STAT("instruction_sie", instruction_sie),
141 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
142 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
143 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
144 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
145 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
146 VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
147 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
148 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
149 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
150 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
151 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
152 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
153 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
154 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
155 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
156 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
157 VCPU_STAT("instruction_diag_10", diagnose_10),
158 VCPU_STAT("instruction_diag_44", diagnose_44),
159 VCPU_STAT("instruction_diag_9c", diagnose_9c),
160 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
161 VCPU_STAT("diag_9c_forward", diagnose_9c_forward),
162 VCPU_STAT("instruction_diag_258", diagnose_258),
163 VCPU_STAT("instruction_diag_308", diagnose_308),
164 VCPU_STAT("instruction_diag_500", diagnose_500),
165 VCPU_STAT("instruction_diag_other", diagnose_other),
169 /* allow nested virtualization in KVM (if enabled by user space) */
171 module_param(nested, int, S_IRUGO);
172 MODULE_PARM_DESC(nested, "Nested virtualization support");
174 /* allow 1m huge page guest backing, if !nested */
176 module_param(hpage, int, 0444);
177 MODULE_PARM_DESC(hpage, "1m huge page backing support");
179 /* maximum percentage of steal time for polling. >100 is treated like 100 */
180 static u8 halt_poll_max_steal = 10;
181 module_param(halt_poll_max_steal, byte, 0644);
182 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
184 /* if set to true, the GISA will be initialized and used if available */
185 static bool use_gisa = true;
186 module_param(use_gisa, bool, 0644);
187 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
189 /* maximum diag9c forwarding per second */
190 unsigned int diag9c_forwarding_hz;
191 module_param(diag9c_forwarding_hz, uint, 0644);
192 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
195 * For now we handle at most 16 double words as this is what the s390 base
196 * kernel handles and stores in the prefix page. If we ever need to go beyond
197 * this, this requires changes to code, but the external uapi can stay.
199 #define SIZE_INTERNAL 16
202 * Base feature mask that defines default mask for facilities. Consists of the
203 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
205 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
207 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
208 * and defines the facilities that can be enabled via a cpu model.
210 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
212 static unsigned long kvm_s390_fac_size(void)
214 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
215 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
216 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
217 sizeof(S390_lowcore.stfle_fac_list));
219 return SIZE_INTERNAL;
222 /* available cpu features supported by kvm */
223 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
224 /* available subfunctions indicated via query / "test bit" */
225 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
227 static struct gmap_notifier gmap_notifier;
228 static struct gmap_notifier vsie_gmap_notifier;
229 debug_info_t *kvm_s390_dbf;
230 debug_info_t *kvm_s390_dbf_uv;
232 /* Section: not file related */
233 int kvm_arch_hardware_enable(void)
235 /* every s390 is virtualization enabled ;-) */
239 int kvm_arch_check_processor_compat(void *opaque)
244 /* forward declarations */
245 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
247 static int sca_switch_to_extended(struct kvm *kvm);
249 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
254 * The TOD jumps by delta, we have to compensate this by adding
255 * -delta to the epoch.
259 /* sign-extension - we're adding to signed values below */
264 if (scb->ecd & ECD_MEF) {
265 scb->epdx += delta_idx;
266 if (scb->epoch < delta)
272 * This callback is executed during stop_machine(). All CPUs are therefore
273 * temporarily stopped. In order not to change guest behavior, we have to
274 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
275 * so a CPU won't be stopped while calculating with the epoch.
277 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
281 struct kvm_vcpu *vcpu;
283 unsigned long long *delta = v;
285 list_for_each_entry(kvm, &vm_list, vm_list) {
286 kvm_for_each_vcpu(i, vcpu, kvm) {
287 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
289 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
290 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
292 if (vcpu->arch.cputm_enabled)
293 vcpu->arch.cputm_start += *delta;
294 if (vcpu->arch.vsie_block)
295 kvm_clock_sync_scb(vcpu->arch.vsie_block,
302 static struct notifier_block kvm_clock_notifier = {
303 .notifier_call = kvm_clock_sync,
306 int kvm_arch_hardware_setup(void *opaque)
308 gmap_notifier.notifier_call = kvm_gmap_notifier;
309 gmap_register_pte_notifier(&gmap_notifier);
310 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
311 gmap_register_pte_notifier(&vsie_gmap_notifier);
312 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
313 &kvm_clock_notifier);
317 void kvm_arch_hardware_unsetup(void)
319 gmap_unregister_pte_notifier(&gmap_notifier);
320 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
321 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
322 &kvm_clock_notifier);
325 static void allow_cpu_feat(unsigned long nr)
327 set_bit_inv(nr, kvm_s390_available_cpu_feat);
330 static inline int plo_test_bit(unsigned char nr)
332 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
336 /* Parameter registers are ignored for "test bit" */
346 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
348 register unsigned long r0 asm("0") = 0; /* query function */
349 register unsigned long r1 asm("1") = (unsigned long) query;
352 /* Parameter regs are ignored */
353 " .insn rrf,%[opc] << 16,2,4,6,0\n"
355 : "d" (r0), "a" (r1), [opc] "i" (opcode)
359 #define INSN_SORTL 0xb938
360 #define INSN_DFLTCC 0xb939
362 static void kvm_s390_cpu_feat_init(void)
366 for (i = 0; i < 256; ++i) {
368 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
371 if (test_facility(28)) /* TOD-clock steering */
372 ptff(kvm_s390_available_subfunc.ptff,
373 sizeof(kvm_s390_available_subfunc.ptff),
376 if (test_facility(17)) { /* MSA */
377 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
378 kvm_s390_available_subfunc.kmac);
379 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
380 kvm_s390_available_subfunc.kmc);
381 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
382 kvm_s390_available_subfunc.km);
383 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
384 kvm_s390_available_subfunc.kimd);
385 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
386 kvm_s390_available_subfunc.klmd);
388 if (test_facility(76)) /* MSA3 */
389 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
390 kvm_s390_available_subfunc.pckmo);
391 if (test_facility(77)) { /* MSA4 */
392 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
393 kvm_s390_available_subfunc.kmctr);
394 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
395 kvm_s390_available_subfunc.kmf);
396 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
397 kvm_s390_available_subfunc.kmo);
398 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
399 kvm_s390_available_subfunc.pcc);
401 if (test_facility(57)) /* MSA5 */
402 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
403 kvm_s390_available_subfunc.ppno);
405 if (test_facility(146)) /* MSA8 */
406 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
407 kvm_s390_available_subfunc.kma);
409 if (test_facility(155)) /* MSA9 */
410 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kdsa);
413 if (test_facility(150)) /* SORTL */
414 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
416 if (test_facility(151)) /* DFLTCC */
417 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
419 if (MACHINE_HAS_ESOP)
420 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
422 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
423 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
425 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
426 !test_facility(3) || !nested)
428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
429 if (sclp.has_64bscao)
430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
442 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
446 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
447 * all skey handling functions read/set the skey from the PGSTE
448 * instead of the real storage key.
450 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
451 * pages being detected as preserved although they are resident.
453 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
454 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
456 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
457 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
458 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
460 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
461 * cannot easily shadow the SCA because of the ipte lock.
465 int kvm_arch_init(void *opaque)
469 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
473 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
474 if (!kvm_s390_dbf_uv)
477 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
478 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
481 kvm_s390_cpu_feat_init();
483 /* Register floating interrupt controller interface. */
484 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
486 pr_err("A FLIC registration call failed with rc=%d\n", rc);
490 rc = kvm_s390_gib_init(GAL_ISC);
501 void kvm_arch_exit(void)
503 kvm_s390_gib_destroy();
504 debug_unregister(kvm_s390_dbf);
505 debug_unregister(kvm_s390_dbf_uv);
508 /* Section: device related */
509 long kvm_arch_dev_ioctl(struct file *filp,
510 unsigned int ioctl, unsigned long arg)
512 if (ioctl == KVM_S390_ENABLE_SIE)
513 return s390_enable_sie();
517 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
522 case KVM_CAP_S390_PSW:
523 case KVM_CAP_S390_GMAP:
524 case KVM_CAP_SYNC_MMU:
525 #ifdef CONFIG_KVM_S390_UCONTROL
526 case KVM_CAP_S390_UCONTROL:
528 case KVM_CAP_ASYNC_PF:
529 case KVM_CAP_SYNC_REGS:
530 case KVM_CAP_ONE_REG:
531 case KVM_CAP_ENABLE_CAP:
532 case KVM_CAP_S390_CSS_SUPPORT:
533 case KVM_CAP_IOEVENTFD:
534 case KVM_CAP_DEVICE_CTRL:
535 case KVM_CAP_S390_IRQCHIP:
536 case KVM_CAP_VM_ATTRIBUTES:
537 case KVM_CAP_MP_STATE:
538 case KVM_CAP_IMMEDIATE_EXIT:
539 case KVM_CAP_S390_INJECT_IRQ:
540 case KVM_CAP_S390_USER_SIGP:
541 case KVM_CAP_S390_USER_STSI:
542 case KVM_CAP_S390_SKEYS:
543 case KVM_CAP_S390_IRQ_STATE:
544 case KVM_CAP_S390_USER_INSTR0:
545 case KVM_CAP_S390_CMMA_MIGRATION:
546 case KVM_CAP_S390_AIS:
547 case KVM_CAP_S390_AIS_MIGRATION:
548 case KVM_CAP_S390_VCPU_RESETS:
549 case KVM_CAP_SET_GUEST_DEBUG:
550 case KVM_CAP_S390_DIAG318:
553 case KVM_CAP_SET_GUEST_DEBUG2:
554 r = KVM_GUESTDBG_VALID_MASK;
556 case KVM_CAP_S390_HPAGE_1M:
558 if (hpage && !kvm_is_ucontrol(kvm))
561 case KVM_CAP_S390_MEM_OP:
564 case KVM_CAP_NR_VCPUS:
565 case KVM_CAP_MAX_VCPUS:
566 case KVM_CAP_MAX_VCPU_ID:
567 r = KVM_S390_BSCA_CPU_SLOTS;
568 if (!kvm_s390_use_sca_entries())
570 else if (sclp.has_esca && sclp.has_64bscao)
571 r = KVM_S390_ESCA_CPU_SLOTS;
573 case KVM_CAP_S390_COW:
574 r = MACHINE_HAS_ESOP;
576 case KVM_CAP_S390_VECTOR_REGISTERS:
579 case KVM_CAP_S390_RI:
580 r = test_facility(64);
582 case KVM_CAP_S390_GS:
583 r = test_facility(133);
585 case KVM_CAP_S390_BPB:
586 r = test_facility(82);
588 case KVM_CAP_S390_PROTECTED:
589 r = is_prot_virt_host();
597 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
600 gfn_t cur_gfn, last_gfn;
601 unsigned long gaddr, vmaddr;
602 struct gmap *gmap = kvm->arch.gmap;
603 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
605 /* Loop over all guest segments */
606 cur_gfn = memslot->base_gfn;
607 last_gfn = memslot->base_gfn + memslot->npages;
608 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
609 gaddr = gfn_to_gpa(cur_gfn);
610 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
611 if (kvm_is_error_hva(vmaddr))
614 bitmap_zero(bitmap, _PAGE_ENTRIES);
615 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
616 for (i = 0; i < _PAGE_ENTRIES; i++) {
617 if (test_bit(i, bitmap))
618 mark_page_dirty(kvm, cur_gfn + i);
621 if (fatal_signal_pending(current))
627 /* Section: vm related */
628 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
631 * Get (and clear) the dirty memory log for a memory slot.
633 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
634 struct kvm_dirty_log *log)
638 struct kvm_memory_slot *memslot;
641 if (kvm_is_ucontrol(kvm))
644 mutex_lock(&kvm->slots_lock);
647 if (log->slot >= KVM_USER_MEM_SLOTS)
650 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
654 /* Clear the dirty log */
656 n = kvm_dirty_bitmap_bytes(memslot);
657 memset(memslot->dirty_bitmap, 0, n);
661 mutex_unlock(&kvm->slots_lock);
665 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
668 struct kvm_vcpu *vcpu;
670 kvm_for_each_vcpu(i, vcpu, kvm) {
671 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
675 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
683 case KVM_CAP_S390_IRQCHIP:
684 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
685 kvm->arch.use_irqchip = 1;
688 case KVM_CAP_S390_USER_SIGP:
689 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
690 kvm->arch.user_sigp = 1;
693 case KVM_CAP_S390_VECTOR_REGISTERS:
694 mutex_lock(&kvm->lock);
695 if (kvm->created_vcpus) {
697 } else if (MACHINE_HAS_VX) {
698 set_kvm_facility(kvm->arch.model.fac_mask, 129);
699 set_kvm_facility(kvm->arch.model.fac_list, 129);
700 if (test_facility(134)) {
701 set_kvm_facility(kvm->arch.model.fac_mask, 134);
702 set_kvm_facility(kvm->arch.model.fac_list, 134);
704 if (test_facility(135)) {
705 set_kvm_facility(kvm->arch.model.fac_mask, 135);
706 set_kvm_facility(kvm->arch.model.fac_list, 135);
708 if (test_facility(148)) {
709 set_kvm_facility(kvm->arch.model.fac_mask, 148);
710 set_kvm_facility(kvm->arch.model.fac_list, 148);
712 if (test_facility(152)) {
713 set_kvm_facility(kvm->arch.model.fac_mask, 152);
714 set_kvm_facility(kvm->arch.model.fac_list, 152);
719 mutex_unlock(&kvm->lock);
720 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
721 r ? "(not available)" : "(success)");
723 case KVM_CAP_S390_RI:
725 mutex_lock(&kvm->lock);
726 if (kvm->created_vcpus) {
728 } else if (test_facility(64)) {
729 set_kvm_facility(kvm->arch.model.fac_mask, 64);
730 set_kvm_facility(kvm->arch.model.fac_list, 64);
733 mutex_unlock(&kvm->lock);
734 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
735 r ? "(not available)" : "(success)");
737 case KVM_CAP_S390_AIS:
738 mutex_lock(&kvm->lock);
739 if (kvm->created_vcpus) {
742 set_kvm_facility(kvm->arch.model.fac_mask, 72);
743 set_kvm_facility(kvm->arch.model.fac_list, 72);
746 mutex_unlock(&kvm->lock);
747 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
748 r ? "(not available)" : "(success)");
750 case KVM_CAP_S390_GS:
752 mutex_lock(&kvm->lock);
753 if (kvm->created_vcpus) {
755 } else if (test_facility(133)) {
756 set_kvm_facility(kvm->arch.model.fac_mask, 133);
757 set_kvm_facility(kvm->arch.model.fac_list, 133);
760 mutex_unlock(&kvm->lock);
761 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
762 r ? "(not available)" : "(success)");
764 case KVM_CAP_S390_HPAGE_1M:
765 mutex_lock(&kvm->lock);
766 if (kvm->created_vcpus)
768 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
772 mmap_write_lock(kvm->mm);
773 kvm->mm->context.allow_gmap_hpage_1m = 1;
774 mmap_write_unlock(kvm->mm);
776 * We might have to create fake 4k page
777 * tables. To avoid that the hardware works on
778 * stale PGSTEs, we emulate these instructions.
780 kvm->arch.use_skf = 0;
781 kvm->arch.use_pfmfi = 0;
783 mutex_unlock(&kvm->lock);
784 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
785 r ? "(not available)" : "(success)");
787 case KVM_CAP_S390_USER_STSI:
788 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
789 kvm->arch.user_stsi = 1;
792 case KVM_CAP_S390_USER_INSTR0:
793 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
794 kvm->arch.user_instr0 = 1;
795 icpt_operexc_on_all_vcpus(kvm);
805 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
809 switch (attr->attr) {
810 case KVM_S390_VM_MEM_LIMIT_SIZE:
812 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
813 kvm->arch.mem_limit);
814 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
824 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 switch (attr->attr) {
829 case KVM_S390_VM_MEM_ENABLE_CMMA:
834 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
835 mutex_lock(&kvm->lock);
836 if (kvm->created_vcpus)
838 else if (kvm->mm->context.allow_gmap_hpage_1m)
841 kvm->arch.use_cmma = 1;
842 /* Not compatible with cmma. */
843 kvm->arch.use_pfmfi = 0;
846 mutex_unlock(&kvm->lock);
848 case KVM_S390_VM_MEM_CLR_CMMA:
853 if (!kvm->arch.use_cmma)
856 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
857 mutex_lock(&kvm->lock);
858 idx = srcu_read_lock(&kvm->srcu);
859 s390_reset_cmma(kvm->arch.gmap->mm);
860 srcu_read_unlock(&kvm->srcu, idx);
861 mutex_unlock(&kvm->lock);
864 case KVM_S390_VM_MEM_LIMIT_SIZE: {
865 unsigned long new_limit;
867 if (kvm_is_ucontrol(kvm))
870 if (get_user(new_limit, (u64 __user *)attr->addr))
873 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
874 new_limit > kvm->arch.mem_limit)
880 /* gmap_create takes last usable address */
881 if (new_limit != KVM_S390_NO_MEM_LIMIT)
885 mutex_lock(&kvm->lock);
886 if (!kvm->created_vcpus) {
887 /* gmap_create will round the limit up */
888 struct gmap *new = gmap_create(current->mm, new_limit);
893 gmap_remove(kvm->arch.gmap);
895 kvm->arch.gmap = new;
899 mutex_unlock(&kvm->lock);
900 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
901 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
902 (void *) kvm->arch.gmap->asce);
912 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
914 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
916 struct kvm_vcpu *vcpu;
919 kvm_s390_vcpu_block_all(kvm);
921 kvm_for_each_vcpu(i, vcpu, kvm) {
922 kvm_s390_vcpu_crypto_setup(vcpu);
923 /* recreate the shadow crycb by leaving the VSIE handler */
924 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
927 kvm_s390_vcpu_unblock_all(kvm);
930 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
932 mutex_lock(&kvm->lock);
933 switch (attr->attr) {
934 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
935 if (!test_kvm_facility(kvm, 76)) {
936 mutex_unlock(&kvm->lock);
940 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
941 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
942 kvm->arch.crypto.aes_kw = 1;
943 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
945 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
946 if (!test_kvm_facility(kvm, 76)) {
947 mutex_unlock(&kvm->lock);
951 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
952 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
953 kvm->arch.crypto.dea_kw = 1;
954 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
956 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
957 if (!test_kvm_facility(kvm, 76)) {
958 mutex_unlock(&kvm->lock);
961 kvm->arch.crypto.aes_kw = 0;
962 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
966 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
967 if (!test_kvm_facility(kvm, 76)) {
968 mutex_unlock(&kvm->lock);
971 kvm->arch.crypto.dea_kw = 0;
972 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
973 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
974 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
976 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
977 if (!ap_instructions_available()) {
978 mutex_unlock(&kvm->lock);
981 kvm->arch.crypto.apie = 1;
983 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
984 if (!ap_instructions_available()) {
985 mutex_unlock(&kvm->lock);
988 kvm->arch.crypto.apie = 0;
991 mutex_unlock(&kvm->lock);
995 kvm_s390_vcpu_crypto_reset_all(kvm);
996 mutex_unlock(&kvm->lock);
1000 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1003 struct kvm_vcpu *vcpu;
1005 kvm_for_each_vcpu(cx, vcpu, kvm)
1006 kvm_s390_sync_request(req, vcpu);
1010 * Must be called with kvm->srcu held to avoid races on memslots, and with
1011 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1013 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1015 struct kvm_memory_slot *ms;
1016 struct kvm_memslots *slots;
1017 unsigned long ram_pages = 0;
1020 /* migration mode already enabled */
1021 if (kvm->arch.migration_mode)
1023 slots = kvm_memslots(kvm);
1024 if (!slots || !slots->used_slots)
1027 if (!kvm->arch.use_cmma) {
1028 kvm->arch.migration_mode = 1;
1031 /* mark all the pages in active slots as dirty */
1032 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1033 ms = slots->memslots + slotnr;
1034 if (!ms->dirty_bitmap)
1037 * The second half of the bitmap is only used on x86,
1038 * and would be wasted otherwise, so we put it to good
1039 * use here to keep track of the state of the storage
1042 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1043 ram_pages += ms->npages;
1045 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1046 kvm->arch.migration_mode = 1;
1047 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1052 * Must be called with kvm->slots_lock to avoid races with ourselves and
1053 * kvm_s390_vm_start_migration.
1055 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1057 /* migration mode already disabled */
1058 if (!kvm->arch.migration_mode)
1060 kvm->arch.migration_mode = 0;
1061 if (kvm->arch.use_cmma)
1062 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1066 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1067 struct kvm_device_attr *attr)
1071 mutex_lock(&kvm->slots_lock);
1072 switch (attr->attr) {
1073 case KVM_S390_VM_MIGRATION_START:
1074 res = kvm_s390_vm_start_migration(kvm);
1076 case KVM_S390_VM_MIGRATION_STOP:
1077 res = kvm_s390_vm_stop_migration(kvm);
1082 mutex_unlock(&kvm->slots_lock);
1087 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1088 struct kvm_device_attr *attr)
1090 u64 mig = kvm->arch.migration_mode;
1092 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1095 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1100 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1102 struct kvm_s390_vm_tod_clock gtod;
1104 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1107 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1109 kvm_s390_set_tod_clock(kvm, >od);
1111 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1112 gtod.epoch_idx, gtod.tod);
1117 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1121 if (copy_from_user(>od_high, (void __user *)attr->addr,
1127 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1132 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1134 struct kvm_s390_vm_tod_clock gtod = { 0 };
1136 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1140 kvm_s390_set_tod_clock(kvm, >od);
1141 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1145 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1152 switch (attr->attr) {
1153 case KVM_S390_VM_TOD_EXT:
1154 ret = kvm_s390_set_tod_ext(kvm, attr);
1156 case KVM_S390_VM_TOD_HIGH:
1157 ret = kvm_s390_set_tod_high(kvm, attr);
1159 case KVM_S390_VM_TOD_LOW:
1160 ret = kvm_s390_set_tod_low(kvm, attr);
1169 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1170 struct kvm_s390_vm_tod_clock *gtod)
1172 union tod_clock clk;
1176 store_tod_clock_ext(&clk);
1178 gtod->tod = clk.tod + kvm->arch.epoch;
1179 gtod->epoch_idx = 0;
1180 if (test_kvm_facility(kvm, 139)) {
1181 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1182 if (gtod->tod < clk.tod)
1183 gtod->epoch_idx += 1;
1189 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1191 struct kvm_s390_vm_tod_clock gtod;
1193 memset(>od, 0, sizeof(gtod));
1194 kvm_s390_get_tod_clock(kvm, >od);
1195 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1198 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1199 gtod.epoch_idx, gtod.tod);
1203 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1207 if (copy_to_user((void __user *)attr->addr, >od_high,
1210 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1215 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1219 gtod = kvm_s390_get_tod_clock_fast(kvm);
1220 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1222 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1227 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1234 switch (attr->attr) {
1235 case KVM_S390_VM_TOD_EXT:
1236 ret = kvm_s390_get_tod_ext(kvm, attr);
1238 case KVM_S390_VM_TOD_HIGH:
1239 ret = kvm_s390_get_tod_high(kvm, attr);
1241 case KVM_S390_VM_TOD_LOW:
1242 ret = kvm_s390_get_tod_low(kvm, attr);
1251 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1253 struct kvm_s390_vm_cpu_processor *proc;
1254 u16 lowest_ibc, unblocked_ibc;
1257 mutex_lock(&kvm->lock);
1258 if (kvm->created_vcpus) {
1262 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1267 if (!copy_from_user(proc, (void __user *)attr->addr,
1269 kvm->arch.model.cpuid = proc->cpuid;
1270 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1271 unblocked_ibc = sclp.ibc & 0xfff;
1272 if (lowest_ibc && proc->ibc) {
1273 if (proc->ibc > unblocked_ibc)
1274 kvm->arch.model.ibc = unblocked_ibc;
1275 else if (proc->ibc < lowest_ibc)
1276 kvm->arch.model.ibc = lowest_ibc;
1278 kvm->arch.model.ibc = proc->ibc;
1280 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1281 S390_ARCH_FAC_LIST_SIZE_BYTE);
1282 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1283 kvm->arch.model.ibc,
1284 kvm->arch.model.cpuid);
1285 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1286 kvm->arch.model.fac_list[0],
1287 kvm->arch.model.fac_list[1],
1288 kvm->arch.model.fac_list[2]);
1293 mutex_unlock(&kvm->lock);
1297 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1298 struct kvm_device_attr *attr)
1300 struct kvm_s390_vm_cpu_feat data;
1302 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1304 if (!bitmap_subset((unsigned long *) data.feat,
1305 kvm_s390_available_cpu_feat,
1306 KVM_S390_VM_CPU_FEAT_NR_BITS))
1309 mutex_lock(&kvm->lock);
1310 if (kvm->created_vcpus) {
1311 mutex_unlock(&kvm->lock);
1314 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1315 KVM_S390_VM_CPU_FEAT_NR_BITS);
1316 mutex_unlock(&kvm->lock);
1317 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1324 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1325 struct kvm_device_attr *attr)
1327 mutex_lock(&kvm->lock);
1328 if (kvm->created_vcpus) {
1329 mutex_unlock(&kvm->lock);
1333 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1334 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1335 mutex_unlock(&kvm->lock);
1338 mutex_unlock(&kvm->lock);
1340 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1341 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1342 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1343 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1344 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1345 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1346 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1347 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1348 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1349 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1350 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1351 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1352 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1353 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1354 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1355 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1356 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1357 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1358 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1359 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1360 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1361 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1362 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1363 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1364 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1365 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1366 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1367 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1368 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1369 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1372 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1375 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1378 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1380 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1381 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1384 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1387 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1389 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1390 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1392 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1393 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1394 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1395 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1396 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1401 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1405 switch (attr->attr) {
1406 case KVM_S390_VM_CPU_PROCESSOR:
1407 ret = kvm_s390_set_processor(kvm, attr);
1409 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1410 ret = kvm_s390_set_processor_feat(kvm, attr);
1412 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1413 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1419 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1421 struct kvm_s390_vm_cpu_processor *proc;
1424 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1429 proc->cpuid = kvm->arch.model.cpuid;
1430 proc->ibc = kvm->arch.model.ibc;
1431 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1432 S390_ARCH_FAC_LIST_SIZE_BYTE);
1433 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1434 kvm->arch.model.ibc,
1435 kvm->arch.model.cpuid);
1436 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1437 kvm->arch.model.fac_list[0],
1438 kvm->arch.model.fac_list[1],
1439 kvm->arch.model.fac_list[2]);
1440 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1447 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1449 struct kvm_s390_vm_cpu_machine *mach;
1452 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1457 get_cpu_id((struct cpuid *) &mach->cpuid);
1458 mach->ibc = sclp.ibc;
1459 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1460 S390_ARCH_FAC_LIST_SIZE_BYTE);
1461 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1462 sizeof(S390_lowcore.stfle_fac_list));
1463 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1464 kvm->arch.model.ibc,
1465 kvm->arch.model.cpuid);
1466 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1470 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1474 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1481 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1482 struct kvm_device_attr *attr)
1484 struct kvm_s390_vm_cpu_feat data;
1486 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1487 KVM_S390_VM_CPU_FEAT_NR_BITS);
1488 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1490 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1497 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1498 struct kvm_device_attr *attr)
1500 struct kvm_s390_vm_cpu_feat data;
1502 bitmap_copy((unsigned long *) data.feat,
1503 kvm_s390_available_cpu_feat,
1504 KVM_S390_VM_CPU_FEAT_NR_BITS);
1505 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1507 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1514 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1515 struct kvm_device_attr *attr)
1517 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1518 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1521 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1522 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1523 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1524 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1525 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1526 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1527 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1528 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1529 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1530 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1531 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1532 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1533 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1534 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1535 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1536 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1537 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1538 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1539 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1540 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1541 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1542 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1543 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1544 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1545 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1546 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1547 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1548 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1549 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1550 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1553 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1556 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1559 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1562 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1565 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1568 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1570 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1571 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1573 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1575 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1576 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1577 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1582 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1583 struct kvm_device_attr *attr)
1585 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1586 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1589 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1591 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1592 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1593 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1594 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1596 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1597 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1598 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1599 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1600 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1601 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1602 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1603 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1604 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1605 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1606 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1607 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1608 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1609 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1610 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1611 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1612 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1613 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1614 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1615 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1616 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1617 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1618 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1621 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1624 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1627 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1630 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1633 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1636 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1639 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1640 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1641 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1642 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1643 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1644 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1645 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1650 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1654 switch (attr->attr) {
1655 case KVM_S390_VM_CPU_PROCESSOR:
1656 ret = kvm_s390_get_processor(kvm, attr);
1658 case KVM_S390_VM_CPU_MACHINE:
1659 ret = kvm_s390_get_machine(kvm, attr);
1661 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1662 ret = kvm_s390_get_processor_feat(kvm, attr);
1664 case KVM_S390_VM_CPU_MACHINE_FEAT:
1665 ret = kvm_s390_get_machine_feat(kvm, attr);
1667 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1668 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1670 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1671 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1677 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1681 switch (attr->group) {
1682 case KVM_S390_VM_MEM_CTRL:
1683 ret = kvm_s390_set_mem_control(kvm, attr);
1685 case KVM_S390_VM_TOD:
1686 ret = kvm_s390_set_tod(kvm, attr);
1688 case KVM_S390_VM_CPU_MODEL:
1689 ret = kvm_s390_set_cpu_model(kvm, attr);
1691 case KVM_S390_VM_CRYPTO:
1692 ret = kvm_s390_vm_set_crypto(kvm, attr);
1694 case KVM_S390_VM_MIGRATION:
1695 ret = kvm_s390_vm_set_migration(kvm, attr);
1705 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1709 switch (attr->group) {
1710 case KVM_S390_VM_MEM_CTRL:
1711 ret = kvm_s390_get_mem_control(kvm, attr);
1713 case KVM_S390_VM_TOD:
1714 ret = kvm_s390_get_tod(kvm, attr);
1716 case KVM_S390_VM_CPU_MODEL:
1717 ret = kvm_s390_get_cpu_model(kvm, attr);
1719 case KVM_S390_VM_MIGRATION:
1720 ret = kvm_s390_vm_get_migration(kvm, attr);
1730 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1734 switch (attr->group) {
1735 case KVM_S390_VM_MEM_CTRL:
1736 switch (attr->attr) {
1737 case KVM_S390_VM_MEM_ENABLE_CMMA:
1738 case KVM_S390_VM_MEM_CLR_CMMA:
1739 ret = sclp.has_cmma ? 0 : -ENXIO;
1741 case KVM_S390_VM_MEM_LIMIT_SIZE:
1749 case KVM_S390_VM_TOD:
1750 switch (attr->attr) {
1751 case KVM_S390_VM_TOD_LOW:
1752 case KVM_S390_VM_TOD_HIGH:
1760 case KVM_S390_VM_CPU_MODEL:
1761 switch (attr->attr) {
1762 case KVM_S390_VM_CPU_PROCESSOR:
1763 case KVM_S390_VM_CPU_MACHINE:
1764 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1765 case KVM_S390_VM_CPU_MACHINE_FEAT:
1766 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1767 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1775 case KVM_S390_VM_CRYPTO:
1776 switch (attr->attr) {
1777 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1778 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1779 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1780 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1783 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1784 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1785 ret = ap_instructions_available() ? 0 : -ENXIO;
1792 case KVM_S390_VM_MIGRATION:
1803 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1807 int srcu_idx, i, r = 0;
1809 if (args->flags != 0)
1812 /* Is this guest using storage keys? */
1813 if (!mm_uses_skeys(current->mm))
1814 return KVM_S390_GET_SKEYS_NONE;
1816 /* Enforce sane limit on memory allocation */
1817 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1820 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1824 mmap_read_lock(current->mm);
1825 srcu_idx = srcu_read_lock(&kvm->srcu);
1826 for (i = 0; i < args->count; i++) {
1827 hva = gfn_to_hva(kvm, args->start_gfn + i);
1828 if (kvm_is_error_hva(hva)) {
1833 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1837 srcu_read_unlock(&kvm->srcu, srcu_idx);
1838 mmap_read_unlock(current->mm);
1841 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1842 sizeof(uint8_t) * args->count);
1851 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1855 int srcu_idx, i, r = 0;
1858 if (args->flags != 0)
1861 /* Enforce sane limit on memory allocation */
1862 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1865 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1869 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1870 sizeof(uint8_t) * args->count);
1876 /* Enable storage key handling for the guest */
1877 r = s390_enable_skey();
1882 mmap_read_lock(current->mm);
1883 srcu_idx = srcu_read_lock(&kvm->srcu);
1884 while (i < args->count) {
1886 hva = gfn_to_hva(kvm, args->start_gfn + i);
1887 if (kvm_is_error_hva(hva)) {
1892 /* Lowest order bit is reserved */
1893 if (keys[i] & 0x01) {
1898 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1900 r = fixup_user_fault(current->mm, hva,
1901 FAULT_FLAG_WRITE, &unlocked);
1908 srcu_read_unlock(&kvm->srcu, srcu_idx);
1909 mmap_read_unlock(current->mm);
1916 * Base address and length must be sent at the start of each block, therefore
1917 * it's cheaper to send some clean data, as long as it's less than the size of
1920 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1921 /* for consistency */
1922 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1925 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1926 * address falls in a hole. In that case the index of one of the memslots
1927 * bordering the hole is returned.
1929 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1931 int start = 0, end = slots->used_slots;
1932 int slot = atomic_read(&slots->lru_slot);
1933 struct kvm_memory_slot *memslots = slots->memslots;
1935 if (gfn >= memslots[slot].base_gfn &&
1936 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1939 while (start < end) {
1940 slot = start + (end - start) / 2;
1942 if (gfn >= memslots[slot].base_gfn)
1948 if (start >= slots->used_slots)
1949 return slots->used_slots - 1;
1951 if (gfn >= memslots[start].base_gfn &&
1952 gfn < memslots[start].base_gfn + memslots[start].npages) {
1953 atomic_set(&slots->lru_slot, start);
1959 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1960 u8 *res, unsigned long bufsize)
1962 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1965 while (args->count < bufsize) {
1966 hva = gfn_to_hva(kvm, cur_gfn);
1968 * We return an error if the first value was invalid, but we
1969 * return successfully if at least one value was copied.
1971 if (kvm_is_error_hva(hva))
1972 return args->count ? 0 : -EFAULT;
1973 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1975 res[args->count++] = (pgstev >> 24) & 0x43;
1982 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1983 unsigned long cur_gfn)
1985 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1986 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1987 unsigned long ofs = cur_gfn - ms->base_gfn;
1989 if (ms->base_gfn + ms->npages <= cur_gfn) {
1991 /* If we are above the highest slot, wrap around */
1993 slotidx = slots->used_slots - 1;
1995 ms = slots->memslots + slotidx;
1998 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1999 while ((slotidx > 0) && (ofs >= ms->npages)) {
2001 ms = slots->memslots + slotidx;
2002 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2004 return ms->base_gfn + ofs;
2007 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2008 u8 *res, unsigned long bufsize)
2010 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2011 struct kvm_memslots *slots = kvm_memslots(kvm);
2012 struct kvm_memory_slot *ms;
2014 if (unlikely(!slots->used_slots))
2017 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2018 ms = gfn_to_memslot(kvm, cur_gfn);
2020 args->start_gfn = cur_gfn;
2023 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2024 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2026 while (args->count < bufsize) {
2027 hva = gfn_to_hva(kvm, cur_gfn);
2028 if (kvm_is_error_hva(hva))
2030 /* Decrement only if we actually flipped the bit to 0 */
2031 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2032 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2033 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2035 /* Save the value */
2036 res[args->count++] = (pgstev >> 24) & 0x43;
2037 /* If the next bit is too far away, stop. */
2038 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2040 /* If we reached the previous "next", find the next one */
2041 if (cur_gfn == next_gfn)
2042 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2043 /* Reached the end of memory or of the buffer, stop */
2044 if ((next_gfn >= mem_end) ||
2045 (next_gfn - args->start_gfn >= bufsize))
2048 /* Reached the end of the current memslot, take the next one. */
2049 if (cur_gfn - ms->base_gfn >= ms->npages) {
2050 ms = gfn_to_memslot(kvm, cur_gfn);
2059 * This function searches for the next page with dirty CMMA attributes, and
2060 * saves the attributes in the buffer up to either the end of the buffer or
2061 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2062 * no trailing clean bytes are saved.
2063 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2064 * output buffer will indicate 0 as length.
2066 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2067 struct kvm_s390_cmma_log *args)
2069 unsigned long bufsize;
2070 int srcu_idx, peek, ret;
2073 if (!kvm->arch.use_cmma)
2075 /* Invalid/unsupported flags were specified */
2076 if (args->flags & ~KVM_S390_CMMA_PEEK)
2078 /* Migration mode query, and we are not doing a migration */
2079 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2080 if (!peek && !kvm->arch.migration_mode)
2082 /* CMMA is disabled or was not used, or the buffer has length zero */
2083 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2084 if (!bufsize || !kvm->mm->context.uses_cmm) {
2085 memset(args, 0, sizeof(*args));
2088 /* We are not peeking, and there are no dirty pages */
2089 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2090 memset(args, 0, sizeof(*args));
2094 values = vmalloc(bufsize);
2098 mmap_read_lock(kvm->mm);
2099 srcu_idx = srcu_read_lock(&kvm->srcu);
2101 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2103 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2104 srcu_read_unlock(&kvm->srcu, srcu_idx);
2105 mmap_read_unlock(kvm->mm);
2107 if (kvm->arch.migration_mode)
2108 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2110 args->remaining = 0;
2112 if (copy_to_user((void __user *)args->values, values, args->count))
2120 * This function sets the CMMA attributes for the given pages. If the input
2121 * buffer has zero length, no action is taken, otherwise the attributes are
2122 * set and the mm->context.uses_cmm flag is set.
2124 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2125 const struct kvm_s390_cmma_log *args)
2127 unsigned long hva, mask, pgstev, i;
2129 int srcu_idx, r = 0;
2133 if (!kvm->arch.use_cmma)
2135 /* invalid/unsupported flags */
2136 if (args->flags != 0)
2138 /* Enforce sane limit on memory allocation */
2139 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2142 if (args->count == 0)
2145 bits = vmalloc(array_size(sizeof(*bits), args->count));
2149 r = copy_from_user(bits, (void __user *)args->values, args->count);
2155 mmap_read_lock(kvm->mm);
2156 srcu_idx = srcu_read_lock(&kvm->srcu);
2157 for (i = 0; i < args->count; i++) {
2158 hva = gfn_to_hva(kvm, args->start_gfn + i);
2159 if (kvm_is_error_hva(hva)) {
2165 pgstev = pgstev << 24;
2166 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2167 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2169 srcu_read_unlock(&kvm->srcu, srcu_idx);
2170 mmap_read_unlock(kvm->mm);
2172 if (!kvm->mm->context.uses_cmm) {
2173 mmap_write_lock(kvm->mm);
2174 kvm->mm->context.uses_cmm = 1;
2175 mmap_write_unlock(kvm->mm);
2182 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2184 struct kvm_vcpu *vcpu;
2190 * We ignore failures and try to destroy as many CPUs as possible.
2191 * At the same time we must not free the assigned resources when
2192 * this fails, as the ultravisor has still access to that memory.
2193 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2195 * We want to return the first failure rc and rrc, though.
2197 kvm_for_each_vcpu(i, vcpu, kvm) {
2198 mutex_lock(&vcpu->mutex);
2199 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2204 mutex_unlock(&vcpu->mutex);
2209 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2214 struct kvm_vcpu *vcpu;
2216 kvm_for_each_vcpu(i, vcpu, kvm) {
2217 mutex_lock(&vcpu->mutex);
2218 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2219 mutex_unlock(&vcpu->mutex);
2224 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2228 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2232 void __user *argp = (void __user *)cmd->data;
2235 case KVM_PV_ENABLE: {
2237 if (kvm_s390_pv_is_protected(kvm))
2241 * FMT 4 SIE needs esca. As we never switch back to bsca from
2242 * esca, we need no cleanup in the error cases below
2244 r = sca_switch_to_extended(kvm);
2248 mmap_write_lock(current->mm);
2249 r = gmap_mark_unmergeable();
2250 mmap_write_unlock(current->mm);
2254 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2258 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2260 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2262 /* we need to block service interrupts from now on */
2263 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2266 case KVM_PV_DISABLE: {
2268 if (!kvm_s390_pv_is_protected(kvm))
2271 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2273 * If a CPU could not be destroyed, destroy VM will also fail.
2274 * There is no point in trying to destroy it. Instead return
2275 * the rc and rrc from the first CPU that failed destroying.
2279 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2281 /* no need to block service interrupts any more */
2282 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2285 case KVM_PV_SET_SEC_PARMS: {
2286 struct kvm_s390_pv_sec_parm parms = {};
2290 if (!kvm_s390_pv_is_protected(kvm))
2294 if (copy_from_user(&parms, argp, sizeof(parms)))
2297 /* Currently restricted to 8KB */
2299 if (parms.length > PAGE_SIZE * 2)
2303 hdr = vmalloc(parms.length);
2308 if (!copy_from_user(hdr, (void __user *)parms.origin,
2310 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2311 &cmd->rc, &cmd->rrc);
2316 case KVM_PV_UNPACK: {
2317 struct kvm_s390_pv_unp unp = {};
2320 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2324 if (copy_from_user(&unp, argp, sizeof(unp)))
2327 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2328 &cmd->rc, &cmd->rrc);
2331 case KVM_PV_VERIFY: {
2333 if (!kvm_s390_pv_is_protected(kvm))
2336 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2337 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2338 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2342 case KVM_PV_PREP_RESET: {
2344 if (!kvm_s390_pv_is_protected(kvm))
2347 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2348 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2349 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2353 case KVM_PV_UNSHARE_ALL: {
2355 if (!kvm_s390_pv_is_protected(kvm))
2358 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2359 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2360 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2370 long kvm_arch_vm_ioctl(struct file *filp,
2371 unsigned int ioctl, unsigned long arg)
2373 struct kvm *kvm = filp->private_data;
2374 void __user *argp = (void __user *)arg;
2375 struct kvm_device_attr attr;
2379 case KVM_S390_INTERRUPT: {
2380 struct kvm_s390_interrupt s390int;
2383 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2385 r = kvm_s390_inject_vm(kvm, &s390int);
2388 case KVM_CREATE_IRQCHIP: {
2389 struct kvm_irq_routing_entry routing;
2392 if (kvm->arch.use_irqchip) {
2393 /* Set up dummy routing. */
2394 memset(&routing, 0, sizeof(routing));
2395 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2399 case KVM_SET_DEVICE_ATTR: {
2401 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2403 r = kvm_s390_vm_set_attr(kvm, &attr);
2406 case KVM_GET_DEVICE_ATTR: {
2408 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2410 r = kvm_s390_vm_get_attr(kvm, &attr);
2413 case KVM_HAS_DEVICE_ATTR: {
2415 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2417 r = kvm_s390_vm_has_attr(kvm, &attr);
2420 case KVM_S390_GET_SKEYS: {
2421 struct kvm_s390_skeys args;
2424 if (copy_from_user(&args, argp,
2425 sizeof(struct kvm_s390_skeys)))
2427 r = kvm_s390_get_skeys(kvm, &args);
2430 case KVM_S390_SET_SKEYS: {
2431 struct kvm_s390_skeys args;
2434 if (copy_from_user(&args, argp,
2435 sizeof(struct kvm_s390_skeys)))
2437 r = kvm_s390_set_skeys(kvm, &args);
2440 case KVM_S390_GET_CMMA_BITS: {
2441 struct kvm_s390_cmma_log args;
2444 if (copy_from_user(&args, argp, sizeof(args)))
2446 mutex_lock(&kvm->slots_lock);
2447 r = kvm_s390_get_cmma_bits(kvm, &args);
2448 mutex_unlock(&kvm->slots_lock);
2450 r = copy_to_user(argp, &args, sizeof(args));
2456 case KVM_S390_SET_CMMA_BITS: {
2457 struct kvm_s390_cmma_log args;
2460 if (copy_from_user(&args, argp, sizeof(args)))
2462 mutex_lock(&kvm->slots_lock);
2463 r = kvm_s390_set_cmma_bits(kvm, &args);
2464 mutex_unlock(&kvm->slots_lock);
2467 case KVM_S390_PV_COMMAND: {
2468 struct kvm_pv_cmd args;
2470 /* protvirt means user sigp */
2471 kvm->arch.user_cpu_state_ctrl = 1;
2473 if (!is_prot_virt_host()) {
2477 if (copy_from_user(&args, argp, sizeof(args))) {
2485 mutex_lock(&kvm->lock);
2486 r = kvm_s390_handle_pv(kvm, &args);
2487 mutex_unlock(&kvm->lock);
2488 if (copy_to_user(argp, &args, sizeof(args))) {
2501 static int kvm_s390_apxa_installed(void)
2503 struct ap_config_info info;
2505 if (ap_instructions_available()) {
2506 if (ap_qci(&info) == 0)
2514 * The format of the crypto control block (CRYCB) is specified in the 3 low
2515 * order bits of the CRYCB designation (CRYCBD) field as follows:
2516 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2517 * AP extended addressing (APXA) facility are installed.
2518 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2519 * Format 2: Both the APXA and MSAX3 facilities are installed
2521 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2523 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2525 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2526 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2528 /* Check whether MSAX3 is installed */
2529 if (!test_kvm_facility(kvm, 76))
2532 if (kvm_s390_apxa_installed())
2533 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2535 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2538 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2539 unsigned long *aqm, unsigned long *adm)
2541 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2543 mutex_lock(&kvm->lock);
2544 kvm_s390_vcpu_block_all(kvm);
2546 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2547 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2548 memcpy(crycb->apcb1.apm, apm, 32);
2549 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2550 apm[0], apm[1], apm[2], apm[3]);
2551 memcpy(crycb->apcb1.aqm, aqm, 32);
2552 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2553 aqm[0], aqm[1], aqm[2], aqm[3]);
2554 memcpy(crycb->apcb1.adm, adm, 32);
2555 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2556 adm[0], adm[1], adm[2], adm[3]);
2559 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2560 memcpy(crycb->apcb0.apm, apm, 8);
2561 memcpy(crycb->apcb0.aqm, aqm, 2);
2562 memcpy(crycb->apcb0.adm, adm, 2);
2563 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2564 apm[0], *((unsigned short *)aqm),
2565 *((unsigned short *)adm));
2567 default: /* Can not happen */
2571 /* recreate the shadow crycb for each vcpu */
2572 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2573 kvm_s390_vcpu_unblock_all(kvm);
2574 mutex_unlock(&kvm->lock);
2576 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2578 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2580 mutex_lock(&kvm->lock);
2581 kvm_s390_vcpu_block_all(kvm);
2583 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2584 sizeof(kvm->arch.crypto.crycb->apcb0));
2585 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2586 sizeof(kvm->arch.crypto.crycb->apcb1));
2588 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2589 /* recreate the shadow crycb for each vcpu */
2590 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2591 kvm_s390_vcpu_unblock_all(kvm);
2592 mutex_unlock(&kvm->lock);
2594 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2596 static u64 kvm_s390_get_initial_cpuid(void)
2601 cpuid.version = 0xff;
2602 return *((u64 *) &cpuid);
2605 static void kvm_s390_crypto_init(struct kvm *kvm)
2607 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2608 kvm_s390_set_crycb_format(kvm);
2610 if (!test_kvm_facility(kvm, 76))
2613 /* Enable AES/DEA protected key functions by default */
2614 kvm->arch.crypto.aes_kw = 1;
2615 kvm->arch.crypto.dea_kw = 1;
2616 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2617 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2618 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2619 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2622 static void sca_dispose(struct kvm *kvm)
2624 if (kvm->arch.use_esca)
2625 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2627 free_page((unsigned long)(kvm->arch.sca));
2628 kvm->arch.sca = NULL;
2631 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2633 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2635 char debug_name[16];
2636 static unsigned long sca_offset;
2639 #ifdef CONFIG_KVM_S390_UCONTROL
2640 if (type & ~KVM_VM_S390_UCONTROL)
2642 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2649 rc = s390_enable_sie();
2655 if (!sclp.has_64bscao)
2656 alloc_flags |= GFP_DMA;
2657 rwlock_init(&kvm->arch.sca_lock);
2658 /* start with basic SCA */
2659 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2662 mutex_lock(&kvm_lock);
2664 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2666 kvm->arch.sca = (struct bsca_block *)
2667 ((char *) kvm->arch.sca + sca_offset);
2668 mutex_unlock(&kvm_lock);
2670 sprintf(debug_name, "kvm-%u", current->pid);
2672 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2676 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2677 kvm->arch.sie_page2 =
2678 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2679 if (!kvm->arch.sie_page2)
2682 kvm->arch.sie_page2->kvm = kvm;
2683 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2685 for (i = 0; i < kvm_s390_fac_size(); i++) {
2686 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2687 (kvm_s390_fac_base[i] |
2688 kvm_s390_fac_ext[i]);
2689 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2690 kvm_s390_fac_base[i];
2692 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2694 /* we are always in czam mode - even on pre z14 machines */
2695 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2696 set_kvm_facility(kvm->arch.model.fac_list, 138);
2697 /* we emulate STHYI in kvm */
2698 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2699 set_kvm_facility(kvm->arch.model.fac_list, 74);
2700 if (MACHINE_HAS_TLB_GUEST) {
2701 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2702 set_kvm_facility(kvm->arch.model.fac_list, 147);
2705 if (css_general_characteristics.aiv && test_facility(65))
2706 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2708 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2709 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2711 kvm_s390_crypto_init(kvm);
2713 mutex_init(&kvm->arch.float_int.ais_lock);
2714 spin_lock_init(&kvm->arch.float_int.lock);
2715 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2716 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2717 init_waitqueue_head(&kvm->arch.ipte_wq);
2718 mutex_init(&kvm->arch.ipte_mutex);
2720 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2721 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2723 if (type & KVM_VM_S390_UCONTROL) {
2724 kvm->arch.gmap = NULL;
2725 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2727 if (sclp.hamax == U64_MAX)
2728 kvm->arch.mem_limit = TASK_SIZE_MAX;
2730 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2732 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2733 if (!kvm->arch.gmap)
2735 kvm->arch.gmap->private = kvm;
2736 kvm->arch.gmap->pfault_enabled = 0;
2739 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2740 kvm->arch.use_skf = sclp.has_skey;
2741 spin_lock_init(&kvm->arch.start_stop_lock);
2742 kvm_s390_vsie_init(kvm);
2744 kvm_s390_gisa_init(kvm);
2745 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2749 free_page((unsigned long)kvm->arch.sie_page2);
2750 debug_unregister(kvm->arch.dbf);
2752 KVM_EVENT(3, "creation of vm failed: %d", rc);
2756 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2760 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2761 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2762 kvm_s390_clear_local_irqs(vcpu);
2763 kvm_clear_async_pf_completion_queue(vcpu);
2764 if (!kvm_is_ucontrol(vcpu->kvm))
2767 if (kvm_is_ucontrol(vcpu->kvm))
2768 gmap_remove(vcpu->arch.gmap);
2770 if (vcpu->kvm->arch.use_cmma)
2771 kvm_s390_vcpu_unsetup_cmma(vcpu);
2772 /* We can not hold the vcpu mutex here, we are already dying */
2773 if (kvm_s390_pv_cpu_get_handle(vcpu))
2774 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2775 free_page((unsigned long)(vcpu->arch.sie_block));
2778 static void kvm_free_vcpus(struct kvm *kvm)
2781 struct kvm_vcpu *vcpu;
2783 kvm_for_each_vcpu(i, vcpu, kvm)
2784 kvm_vcpu_destroy(vcpu);
2786 mutex_lock(&kvm->lock);
2787 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2788 kvm->vcpus[i] = NULL;
2790 atomic_set(&kvm->online_vcpus, 0);
2791 mutex_unlock(&kvm->lock);
2794 void kvm_arch_destroy_vm(struct kvm *kvm)
2798 kvm_free_vcpus(kvm);
2800 kvm_s390_gisa_destroy(kvm);
2802 * We are already at the end of life and kvm->lock is not taken.
2803 * This is ok as the file descriptor is closed by now and nobody
2804 * can mess with the pv state. To avoid lockdep_assert_held from
2805 * complaining we do not use kvm_s390_pv_is_protected.
2807 if (kvm_s390_pv_get_handle(kvm))
2808 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2809 debug_unregister(kvm->arch.dbf);
2810 free_page((unsigned long)kvm->arch.sie_page2);
2811 if (!kvm_is_ucontrol(kvm))
2812 gmap_remove(kvm->arch.gmap);
2813 kvm_s390_destroy_adapters(kvm);
2814 kvm_s390_clear_float_irqs(kvm);
2815 kvm_s390_vsie_destroy(kvm);
2816 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2819 /* Section: vcpu related */
2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2822 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2823 if (!vcpu->arch.gmap)
2825 vcpu->arch.gmap->private = vcpu->kvm;
2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2832 if (!kvm_s390_use_sca_entries())
2834 read_lock(&vcpu->kvm->arch.sca_lock);
2835 if (vcpu->kvm->arch.use_esca) {
2836 struct esca_block *sca = vcpu->kvm->arch.sca;
2838 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2839 sca->cpu[vcpu->vcpu_id].sda = 0;
2841 struct bsca_block *sca = vcpu->kvm->arch.sca;
2843 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2844 sca->cpu[vcpu->vcpu_id].sda = 0;
2846 read_unlock(&vcpu->kvm->arch.sca_lock);
2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2851 if (!kvm_s390_use_sca_entries()) {
2852 struct bsca_block *sca = vcpu->kvm->arch.sca;
2854 /* we still need the basic sca for the ipte control */
2855 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2859 read_lock(&vcpu->kvm->arch.sca_lock);
2860 if (vcpu->kvm->arch.use_esca) {
2861 struct esca_block *sca = vcpu->kvm->arch.sca;
2863 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2866 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2867 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2869 struct bsca_block *sca = vcpu->kvm->arch.sca;
2871 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2872 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2873 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2874 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2876 read_unlock(&vcpu->kvm->arch.sca_lock);
2879 /* Basic SCA to Extended SCA data copy routines */
2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2883 d->sigp_ctrl.c = s->sigp_ctrl.c;
2884 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2891 d->ipte_control = s->ipte_control;
2893 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2894 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2897 static int sca_switch_to_extended(struct kvm *kvm)
2899 struct bsca_block *old_sca = kvm->arch.sca;
2900 struct esca_block *new_sca;
2901 struct kvm_vcpu *vcpu;
2902 unsigned int vcpu_idx;
2905 if (kvm->arch.use_esca)
2908 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2912 scaoh = (u32)((u64)(new_sca) >> 32);
2913 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2915 kvm_s390_vcpu_block_all(kvm);
2916 write_lock(&kvm->arch.sca_lock);
2918 sca_copy_b_to_e(new_sca, old_sca);
2920 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2921 vcpu->arch.sie_block->scaoh = scaoh;
2922 vcpu->arch.sie_block->scaol = scaol;
2923 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2925 kvm->arch.sca = new_sca;
2926 kvm->arch.use_esca = 1;
2928 write_unlock(&kvm->arch.sca_lock);
2929 kvm_s390_vcpu_unblock_all(kvm);
2931 free_page((unsigned long)old_sca);
2933 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2934 old_sca, kvm->arch.sca);
2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2942 if (!kvm_s390_use_sca_entries()) {
2943 if (id < KVM_MAX_VCPUS)
2947 if (id < KVM_S390_BSCA_CPU_SLOTS)
2949 if (!sclp.has_esca || !sclp.has_64bscao)
2952 mutex_lock(&kvm->lock);
2953 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2954 mutex_unlock(&kvm->lock);
2956 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2962 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2963 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964 vcpu->arch.cputm_start = get_tod_clock_fast();
2965 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2971 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2972 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2973 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2974 vcpu->arch.cputm_start = 0;
2975 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2981 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2982 vcpu->arch.cputm_enabled = true;
2983 __start_cpu_timer_accounting(vcpu);
2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2989 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2990 __stop_cpu_timer_accounting(vcpu);
2991 vcpu->arch.cputm_enabled = false;
2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2996 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2997 __enable_cpu_timer_accounting(vcpu);
3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3003 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004 __disable_cpu_timer_accounting(vcpu);
3008 /* set the cpu timer - may only be called from the VCPU thread itself */
3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3011 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3012 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3013 if (vcpu->arch.cputm_enabled)
3014 vcpu->arch.cputm_start = get_tod_clock_fast();
3015 vcpu->arch.sie_block->cputm = cputm;
3016 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3020 /* update and get the cpu timer - can also be called from other VCPU threads */
3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3026 if (unlikely(!vcpu->arch.cputm_enabled))
3027 return vcpu->arch.sie_block->cputm;
3029 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3031 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3033 * If the writer would ever execute a read in the critical
3034 * section, e.g. in irq context, we have a deadlock.
3036 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3037 value = vcpu->arch.sie_block->cputm;
3038 /* if cputm_start is 0, accounting is being started/stopped */
3039 if (likely(vcpu->arch.cputm_start))
3040 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3041 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3049 gmap_enable(vcpu->arch.enabled_gmap);
3050 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3051 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052 __start_cpu_timer_accounting(vcpu);
3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3059 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3060 __stop_cpu_timer_accounting(vcpu);
3061 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3062 vcpu->arch.enabled_gmap = gmap_get_enabled();
3063 gmap_disable(vcpu->arch.enabled_gmap);
3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3069 mutex_lock(&vcpu->kvm->lock);
3071 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3072 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3074 mutex_unlock(&vcpu->kvm->lock);
3075 if (!kvm_is_ucontrol(vcpu->kvm)) {
3076 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3079 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3080 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3081 /* make vcpu_load load the right gmap on the first trigger */
3082 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3087 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3088 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3095 /* At least one ECC subfunction must be present */
3096 return kvm_has_pckmo_subfunc(kvm, 32) ||
3097 kvm_has_pckmo_subfunc(kvm, 33) ||
3098 kvm_has_pckmo_subfunc(kvm, 34) ||
3099 kvm_has_pckmo_subfunc(kvm, 40) ||
3100 kvm_has_pckmo_subfunc(kvm, 41);
3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3107 * If the AP instructions are not being interpreted and the MSAX3
3108 * facility is not configured for the guest, there is nothing to set up.
3110 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3113 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3114 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3115 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3116 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3118 if (vcpu->kvm->arch.crypto.apie)
3119 vcpu->arch.sie_block->eca |= ECA_APIE;
3121 /* Set up protected key support */
3122 if (vcpu->kvm->arch.crypto.aes_kw) {
3123 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3124 /* ecc is also wrapped with AES key */
3125 if (kvm_has_pckmo_ecc(vcpu->kvm))
3126 vcpu->arch.sie_block->ecd |= ECD_ECC;
3129 if (vcpu->kvm->arch.crypto.dea_kw)
3130 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3135 free_page(vcpu->arch.sie_block->cbrlo);
3136 vcpu->arch.sie_block->cbrlo = 0;
3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3141 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3142 if (!vcpu->arch.sie_block->cbrlo)
3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3149 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3151 vcpu->arch.sie_block->ibc = model->ibc;
3152 if (test_kvm_facility(vcpu->kvm, 7))
3153 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3161 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3165 if (test_kvm_facility(vcpu->kvm, 78))
3166 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3167 else if (test_kvm_facility(vcpu->kvm, 8))
3168 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3170 kvm_s390_vcpu_setup_model(vcpu);
3172 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3173 if (MACHINE_HAS_ESOP)
3174 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3175 if (test_kvm_facility(vcpu->kvm, 9))
3176 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3177 if (test_kvm_facility(vcpu->kvm, 73))
3178 vcpu->arch.sie_block->ecb |= ECB_TE;
3180 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3181 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3182 if (test_kvm_facility(vcpu->kvm, 130))
3183 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3184 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3186 vcpu->arch.sie_block->eca |= ECA_CEI;
3188 vcpu->arch.sie_block->eca |= ECA_IB;
3190 vcpu->arch.sie_block->eca |= ECA_SII;
3191 if (sclp.has_sigpif)
3192 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3193 if (test_kvm_facility(vcpu->kvm, 129)) {
3194 vcpu->arch.sie_block->eca |= ECA_VX;
3195 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3197 if (test_kvm_facility(vcpu->kvm, 139))
3198 vcpu->arch.sie_block->ecd |= ECD_MEF;
3199 if (test_kvm_facility(vcpu->kvm, 156))
3200 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3201 if (vcpu->arch.sie_block->gd) {
3202 vcpu->arch.sie_block->eca |= ECA_AIV;
3203 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3204 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3206 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3208 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3211 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3213 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3215 if (vcpu->kvm->arch.use_cmma) {
3216 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3220 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3221 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3223 vcpu->arch.sie_block->hpid = HPID_KVM;
3225 kvm_s390_vcpu_crypto_setup(vcpu);
3227 mutex_lock(&vcpu->kvm->lock);
3228 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3229 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3231 kvm_s390_vcpu_unsetup_cmma(vcpu);
3233 mutex_unlock(&vcpu->kvm->lock);
3238 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3240 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3245 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3247 struct sie_page *sie_page;
3250 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3251 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3255 vcpu->arch.sie_block = &sie_page->sie_block;
3256 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3258 /* the real guest size will always be smaller than msl */
3259 vcpu->arch.sie_block->mso = 0;
3260 vcpu->arch.sie_block->msl = sclp.hamax;
3262 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3263 spin_lock_init(&vcpu->arch.local_int.lock);
3264 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3265 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3266 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3267 seqcount_init(&vcpu->arch.cputm_seqcount);
3269 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3270 kvm_clear_async_pf_completion_queue(vcpu);
3271 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3278 kvm_s390_set_prefix(vcpu, 0);
3279 if (test_kvm_facility(vcpu->kvm, 64))
3280 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3281 if (test_kvm_facility(vcpu->kvm, 82))
3282 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3283 if (test_kvm_facility(vcpu->kvm, 133))
3284 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3285 if (test_kvm_facility(vcpu->kvm, 156))
3286 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3287 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3288 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3291 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3293 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3295 if (kvm_is_ucontrol(vcpu->kvm)) {
3296 rc = __kvm_ucontrol_vcpu_init(vcpu);
3298 goto out_free_sie_block;
3301 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3302 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3303 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3305 rc = kvm_s390_vcpu_setup(vcpu);
3307 goto out_ucontrol_uninit;
3310 out_ucontrol_uninit:
3311 if (kvm_is_ucontrol(vcpu->kvm))
3312 gmap_remove(vcpu->arch.gmap);
3314 free_page((unsigned long)(vcpu->arch.sie_block));
3318 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3320 return kvm_s390_vcpu_has_irq(vcpu, 0);
3323 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3325 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3328 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3330 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3334 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3336 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3339 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3341 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3345 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3347 return atomic_read(&vcpu->arch.sie_block->prog20) &
3348 (PROG_BLOCK_SIE | PROG_REQUEST);
3351 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3353 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3357 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3358 * If the CPU is not running (e.g. waiting as idle) the function will
3359 * return immediately. */
3360 void exit_sie(struct kvm_vcpu *vcpu)
3362 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3363 kvm_s390_vsie_kick(vcpu);
3364 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3368 /* Kick a guest cpu out of SIE to process a request synchronously */
3369 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3371 kvm_make_request(req, vcpu);
3372 kvm_s390_vcpu_request(vcpu);
3375 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3378 struct kvm *kvm = gmap->private;
3379 struct kvm_vcpu *vcpu;
3380 unsigned long prefix;
3383 if (gmap_is_shadow(gmap))
3385 if (start >= 1UL << 31)
3386 /* We are only interested in prefix pages */
3388 kvm_for_each_vcpu(i, vcpu, kvm) {
3389 /* match against both prefix pages */
3390 prefix = kvm_s390_get_prefix(vcpu);
3391 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3392 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3394 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3399 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3401 /* do not poll with more than halt_poll_max_steal percent of steal time */
3402 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3403 halt_poll_max_steal) {
3404 vcpu->stat.halt_no_poll_steal++;
3410 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3412 /* kvm common code refers to this, but never calls it */
3417 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3418 struct kvm_one_reg *reg)
3423 case KVM_REG_S390_TODPR:
3424 r = put_user(vcpu->arch.sie_block->todpr,
3425 (u32 __user *)reg->addr);
3427 case KVM_REG_S390_EPOCHDIFF:
3428 r = put_user(vcpu->arch.sie_block->epoch,
3429 (u64 __user *)reg->addr);
3431 case KVM_REG_S390_CPU_TIMER:
3432 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3433 (u64 __user *)reg->addr);
3435 case KVM_REG_S390_CLOCK_COMP:
3436 r = put_user(vcpu->arch.sie_block->ckc,
3437 (u64 __user *)reg->addr);
3439 case KVM_REG_S390_PFTOKEN:
3440 r = put_user(vcpu->arch.pfault_token,
3441 (u64 __user *)reg->addr);
3443 case KVM_REG_S390_PFCOMPARE:
3444 r = put_user(vcpu->arch.pfault_compare,
3445 (u64 __user *)reg->addr);
3447 case KVM_REG_S390_PFSELECT:
3448 r = put_user(vcpu->arch.pfault_select,
3449 (u64 __user *)reg->addr);
3451 case KVM_REG_S390_PP:
3452 r = put_user(vcpu->arch.sie_block->pp,
3453 (u64 __user *)reg->addr);
3455 case KVM_REG_S390_GBEA:
3456 r = put_user(vcpu->arch.sie_block->gbea,
3457 (u64 __user *)reg->addr);
3466 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3467 struct kvm_one_reg *reg)
3473 case KVM_REG_S390_TODPR:
3474 r = get_user(vcpu->arch.sie_block->todpr,
3475 (u32 __user *)reg->addr);
3477 case KVM_REG_S390_EPOCHDIFF:
3478 r = get_user(vcpu->arch.sie_block->epoch,
3479 (u64 __user *)reg->addr);
3481 case KVM_REG_S390_CPU_TIMER:
3482 r = get_user(val, (u64 __user *)reg->addr);
3484 kvm_s390_set_cpu_timer(vcpu, val);
3486 case KVM_REG_S390_CLOCK_COMP:
3487 r = get_user(vcpu->arch.sie_block->ckc,
3488 (u64 __user *)reg->addr);
3490 case KVM_REG_S390_PFTOKEN:
3491 r = get_user(vcpu->arch.pfault_token,
3492 (u64 __user *)reg->addr);
3493 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3494 kvm_clear_async_pf_completion_queue(vcpu);
3496 case KVM_REG_S390_PFCOMPARE:
3497 r = get_user(vcpu->arch.pfault_compare,
3498 (u64 __user *)reg->addr);
3500 case KVM_REG_S390_PFSELECT:
3501 r = get_user(vcpu->arch.pfault_select,
3502 (u64 __user *)reg->addr);
3504 case KVM_REG_S390_PP:
3505 r = get_user(vcpu->arch.sie_block->pp,
3506 (u64 __user *)reg->addr);
3508 case KVM_REG_S390_GBEA:
3509 r = get_user(vcpu->arch.sie_block->gbea,
3510 (u64 __user *)reg->addr);
3519 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3521 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3522 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3523 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3525 kvm_clear_async_pf_completion_queue(vcpu);
3526 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3527 kvm_s390_vcpu_stop(vcpu);
3528 kvm_s390_clear_local_irqs(vcpu);
3531 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3533 /* Initial reset is a superset of the normal reset */
3534 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3537 * This equals initial cpu reset in pop, but we don't switch to ESA.
3538 * We do not only reset the internal data, but also ...
3540 vcpu->arch.sie_block->gpsw.mask = 0;
3541 vcpu->arch.sie_block->gpsw.addr = 0;
3542 kvm_s390_set_prefix(vcpu, 0);
3543 kvm_s390_set_cpu_timer(vcpu, 0);
3544 vcpu->arch.sie_block->ckc = 0;
3545 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3546 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3547 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3549 /* ... the data in sync regs */
3550 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3551 vcpu->run->s.regs.ckc = 0;
3552 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3553 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3554 vcpu->run->psw_addr = 0;
3555 vcpu->run->psw_mask = 0;
3556 vcpu->run->s.regs.todpr = 0;
3557 vcpu->run->s.regs.cputm = 0;
3558 vcpu->run->s.regs.ckc = 0;
3559 vcpu->run->s.regs.pp = 0;
3560 vcpu->run->s.regs.gbea = 1;
3561 vcpu->run->s.regs.fpc = 0;
3563 * Do not reset these registers in the protected case, as some of
3564 * them are overlayed and they are not accessible in this case
3567 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3568 vcpu->arch.sie_block->gbea = 1;
3569 vcpu->arch.sie_block->pp = 0;
3570 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3571 vcpu->arch.sie_block->todpr = 0;
3575 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3577 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3579 /* Clear reset is a superset of the initial reset */
3580 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3582 memset(®s->gprs, 0, sizeof(regs->gprs));
3583 memset(®s->vrs, 0, sizeof(regs->vrs));
3584 memset(®s->acrs, 0, sizeof(regs->acrs));
3585 memset(®s->gscb, 0, sizeof(regs->gscb));
3588 regs->etoken_extension = 0;
3591 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3594 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3599 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3602 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3607 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3608 struct kvm_sregs *sregs)
3612 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3613 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3619 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3620 struct kvm_sregs *sregs)
3624 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3625 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3631 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3637 if (test_fp_ctl(fpu->fpc)) {
3641 vcpu->run->s.regs.fpc = fpu->fpc;
3643 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3644 (freg_t *) fpu->fprs);
3646 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3653 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3657 /* make sure we have the latest values */
3660 convert_vx_to_fp((freg_t *) fpu->fprs,
3661 (__vector128 *) vcpu->run->s.regs.vrs);
3663 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3664 fpu->fpc = vcpu->run->s.regs.fpc;
3670 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3674 if (!is_vcpu_stopped(vcpu))
3677 vcpu->run->psw_mask = psw.mask;
3678 vcpu->run->psw_addr = psw.addr;
3683 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3684 struct kvm_translation *tr)
3686 return -EINVAL; /* not implemented yet */
3689 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3690 KVM_GUESTDBG_USE_HW_BP | \
3691 KVM_GUESTDBG_ENABLE)
3693 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3694 struct kvm_guest_debug *dbg)
3700 vcpu->guest_debug = 0;
3701 kvm_s390_clear_bp_data(vcpu);
3703 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3707 if (!sclp.has_gpere) {
3712 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3713 vcpu->guest_debug = dbg->control;
3714 /* enforce guest PER */
3715 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3717 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3718 rc = kvm_s390_import_bp_data(vcpu, dbg);
3720 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3721 vcpu->arch.guestdbg.last_bp = 0;
3725 vcpu->guest_debug = 0;
3726 kvm_s390_clear_bp_data(vcpu);
3727 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3735 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3736 struct kvm_mp_state *mp_state)
3742 /* CHECK_STOP and LOAD are not supported yet */
3743 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3744 KVM_MP_STATE_OPERATING;
3750 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3751 struct kvm_mp_state *mp_state)
3757 /* user space knows about this interface - let it control the state */
3758 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3760 switch (mp_state->mp_state) {
3761 case KVM_MP_STATE_STOPPED:
3762 rc = kvm_s390_vcpu_stop(vcpu);
3764 case KVM_MP_STATE_OPERATING:
3765 rc = kvm_s390_vcpu_start(vcpu);
3767 case KVM_MP_STATE_LOAD:
3768 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3772 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3774 case KVM_MP_STATE_CHECK_STOP:
3775 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3784 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3786 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3789 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3792 kvm_s390_vcpu_request_handled(vcpu);
3793 if (!kvm_request_pending(vcpu))
3796 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3797 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3798 * This ensures that the ipte instruction for this request has
3799 * already finished. We might race against a second unmapper that
3800 * wants to set the blocking bit. Lets just retry the request loop.
3802 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3804 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3805 kvm_s390_get_prefix(vcpu),
3806 PAGE_SIZE * 2, PROT_WRITE);
3808 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3814 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3815 vcpu->arch.sie_block->ihcpu = 0xffff;
3819 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3820 if (!ibs_enabled(vcpu)) {
3821 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3822 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3827 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3828 if (ibs_enabled(vcpu)) {
3829 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3830 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3835 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3836 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3840 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3842 * Disable CMM virtualization; we will emulate the ESSA
3843 * instruction manually, in order to provide additional
3844 * functionalities needed for live migration.
3846 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3850 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3852 * Re-enable CMM virtualization if CMMA is available and
3853 * CMM has been used.
3855 if ((vcpu->kvm->arch.use_cmma) &&
3856 (vcpu->kvm->mm->context.uses_cmm))
3857 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3861 /* nothing to do, just clear the request */
3862 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3863 /* we left the vsie handler, nothing to do, just clear the request */
3864 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3869 void kvm_s390_set_tod_clock(struct kvm *kvm,
3870 const struct kvm_s390_vm_tod_clock *gtod)
3872 struct kvm_vcpu *vcpu;
3873 union tod_clock clk;
3876 mutex_lock(&kvm->lock);
3879 store_tod_clock_ext(&clk);
3881 kvm->arch.epoch = gtod->tod - clk.tod;
3883 if (test_kvm_facility(kvm, 139)) {
3884 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3885 if (kvm->arch.epoch > gtod->tod)
3886 kvm->arch.epdx -= 1;
3889 kvm_s390_vcpu_block_all(kvm);
3890 kvm_for_each_vcpu(i, vcpu, kvm) {
3891 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3892 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3895 kvm_s390_vcpu_unblock_all(kvm);
3897 mutex_unlock(&kvm->lock);
3901 * kvm_arch_fault_in_page - fault-in guest page if necessary
3902 * @vcpu: The corresponding virtual cpu
3903 * @gpa: Guest physical address
3904 * @writable: Whether the page should be writable or not
3906 * Make sure that a guest page has been faulted-in on the host.
3908 * Return: Zero on success, negative error code otherwise.
3910 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3912 return gmap_fault(vcpu->arch.gmap, gpa,
3913 writable ? FAULT_FLAG_WRITE : 0);
3916 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3917 unsigned long token)
3919 struct kvm_s390_interrupt inti;
3920 struct kvm_s390_irq irq;
3923 irq.u.ext.ext_params2 = token;
3924 irq.type = KVM_S390_INT_PFAULT_INIT;
3925 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3927 inti.type = KVM_S390_INT_PFAULT_DONE;
3928 inti.parm64 = token;
3929 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3933 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3934 struct kvm_async_pf *work)
3936 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3937 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3942 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3943 struct kvm_async_pf *work)
3945 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3946 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3949 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3950 struct kvm_async_pf *work)
3952 /* s390 will always inject the page directly */
3955 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3958 * s390 will always inject the page directly,
3959 * but we still want check_async_completion to cleanup
3964 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3967 struct kvm_arch_async_pf arch;
3969 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3971 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3972 vcpu->arch.pfault_compare)
3974 if (psw_extint_disabled(vcpu))
3976 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3978 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3980 if (!vcpu->arch.gmap->pfault_enabled)
3983 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3984 hva += current->thread.gmap_addr & ~PAGE_MASK;
3985 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3988 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3991 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3996 * On s390 notifications for arriving pages will be delivered directly
3997 * to the guest but the house keeping for completed pfaults is
3998 * handled outside the worker.
4000 kvm_check_async_pf_completion(vcpu);
4002 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4003 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4008 if (!kvm_is_ucontrol(vcpu->kvm)) {
4009 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4014 rc = kvm_s390_handle_requests(vcpu);
4018 if (guestdbg_enabled(vcpu)) {
4019 kvm_s390_backup_guest_per_regs(vcpu);
4020 kvm_s390_patch_guest_per_regs(vcpu);
4023 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4025 vcpu->arch.sie_block->icptcode = 0;
4026 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4027 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4028 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4033 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4035 struct kvm_s390_pgm_info pgm_info = {
4036 .code = PGM_ADDRESSING,
4041 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4042 trace_kvm_s390_sie_fault(vcpu);
4045 * We want to inject an addressing exception, which is defined as a
4046 * suppressing or terminating exception. However, since we came here
4047 * by a DAT access exception, the PSW still points to the faulting
4048 * instruction since DAT exceptions are nullifying. So we've got
4049 * to look up the current opcode to get the length of the instruction
4050 * to be able to forward the PSW.
4052 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4053 ilen = insn_length(opcode);
4057 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4058 * Forward by arbitrary ilc, injection will take care of
4059 * nullification if necessary.
4061 pgm_info = vcpu->arch.pgm;
4064 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4065 kvm_s390_forward_psw(vcpu, ilen);
4066 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4069 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4071 struct mcck_volatile_info *mcck_info;
4072 struct sie_page *sie_page;
4074 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4075 vcpu->arch.sie_block->icptcode);
4076 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4078 if (guestdbg_enabled(vcpu))
4079 kvm_s390_restore_guest_per_regs(vcpu);
4081 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4082 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4084 if (exit_reason == -EINTR) {
4085 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4086 sie_page = container_of(vcpu->arch.sie_block,
4087 struct sie_page, sie_block);
4088 mcck_info = &sie_page->mcck_info;
4089 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4093 if (vcpu->arch.sie_block->icptcode > 0) {
4094 int rc = kvm_handle_sie_intercept(vcpu);
4096 if (rc != -EOPNOTSUPP)
4098 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4099 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4100 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4101 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4103 } else if (exit_reason != -EFAULT) {
4104 vcpu->stat.exit_null++;
4106 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4107 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4108 vcpu->run->s390_ucontrol.trans_exc_code =
4109 current->thread.gmap_addr;
4110 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4112 } else if (current->thread.gmap_pfault) {
4113 trace_kvm_s390_major_guest_pfault(vcpu);
4114 current->thread.gmap_pfault = 0;
4115 if (kvm_arch_setup_async_pf(vcpu))
4117 vcpu->stat.pfault_sync++;
4118 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4120 return vcpu_post_run_fault_in_sie(vcpu);
4123 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4124 static int __vcpu_run(struct kvm_vcpu *vcpu)
4126 int rc, exit_reason;
4127 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4130 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4131 * ning the guest), so that memslots (and other stuff) are protected
4133 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4136 rc = vcpu_pre_run(vcpu);
4140 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4142 * As PF_VCPU will be used in fault handler, between
4143 * guest_enter and guest_exit should be no uaccess.
4145 local_irq_disable();
4146 guest_enter_irqoff();
4147 __disable_cpu_timer_accounting(vcpu);
4149 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4150 memcpy(sie_page->pv_grregs,
4151 vcpu->run->s.regs.gprs,
4152 sizeof(sie_page->pv_grregs));
4154 if (test_cpu_flag(CIF_FPU))
4156 exit_reason = sie64a(vcpu->arch.sie_block,
4157 vcpu->run->s.regs.gprs);
4158 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4159 memcpy(vcpu->run->s.regs.gprs,
4160 sie_page->pv_grregs,
4161 sizeof(sie_page->pv_grregs));
4163 * We're not allowed to inject interrupts on intercepts
4164 * that leave the guest state in an "in-between" state
4165 * where the next SIE entry will do a continuation.
4166 * Fence interrupts in our "internal" PSW.
4168 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4169 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4170 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4173 local_irq_disable();
4174 __enable_cpu_timer_accounting(vcpu);
4175 guest_exit_irqoff();
4177 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4179 rc = vcpu_post_run(vcpu, exit_reason);
4180 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4182 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4186 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4188 struct kvm_run *kvm_run = vcpu->run;
4189 struct runtime_instr_cb *riccb;
4192 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4193 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4194 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4195 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4196 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4197 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4198 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4199 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4201 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4202 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4203 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4204 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4205 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4206 kvm_clear_async_pf_completion_queue(vcpu);
4208 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4209 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4210 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4213 * If userspace sets the riccb (e.g. after migration) to a valid state,
4214 * we should enable RI here instead of doing the lazy enablement.
4216 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4217 test_kvm_facility(vcpu->kvm, 64) &&
4219 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4220 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4221 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4224 * If userspace sets the gscb (e.g. after migration) to non-zero,
4225 * we should enable GS here instead of doing the lazy enablement.
4227 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4228 test_kvm_facility(vcpu->kvm, 133) &&
4230 !vcpu->arch.gs_enabled) {
4231 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4232 vcpu->arch.sie_block->ecb |= ECB_GS;
4233 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4234 vcpu->arch.gs_enabled = 1;
4236 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4237 test_kvm_facility(vcpu->kvm, 82)) {
4238 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4239 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4241 if (MACHINE_HAS_GS) {
4243 __ctl_set_bit(2, 4);
4244 if (current->thread.gs_cb) {
4245 vcpu->arch.host_gscb = current->thread.gs_cb;
4246 save_gs_cb(vcpu->arch.host_gscb);
4248 if (vcpu->arch.gs_enabled) {
4249 current->thread.gs_cb = (struct gs_cb *)
4250 &vcpu->run->s.regs.gscb;
4251 restore_gs_cb(current->thread.gs_cb);
4255 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4258 static void sync_regs(struct kvm_vcpu *vcpu)
4260 struct kvm_run *kvm_run = vcpu->run;
4262 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4263 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4264 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4265 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4266 /* some control register changes require a tlb flush */
4267 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4269 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4270 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4271 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4273 save_access_regs(vcpu->arch.host_acrs);
4274 restore_access_regs(vcpu->run->s.regs.acrs);
4275 /* save host (userspace) fprs/vrs */
4277 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4278 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4280 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4282 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4283 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4284 if (test_fp_ctl(current->thread.fpu.fpc))
4285 /* User space provided an invalid FPC, let's clear it */
4286 current->thread.fpu.fpc = 0;
4288 /* Sync fmt2 only data */
4289 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4290 sync_regs_fmt2(vcpu);
4293 * In several places we have to modify our internal view to
4294 * not do things that are disallowed by the ultravisor. For
4295 * example we must not inject interrupts after specific exits
4296 * (e.g. 112 prefix page not secure). We do this by turning
4297 * off the machine check, external and I/O interrupt bits
4298 * of our PSW copy. To avoid getting validity intercepts, we
4299 * do only accept the condition code from userspace.
4301 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4302 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4306 kvm_run->kvm_dirty_regs = 0;
4309 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4311 struct kvm_run *kvm_run = vcpu->run;
4313 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4314 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4315 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4316 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4317 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4318 if (MACHINE_HAS_GS) {
4320 __ctl_set_bit(2, 4);
4321 if (vcpu->arch.gs_enabled)
4322 save_gs_cb(current->thread.gs_cb);
4323 current->thread.gs_cb = vcpu->arch.host_gscb;
4324 restore_gs_cb(vcpu->arch.host_gscb);
4325 if (!vcpu->arch.host_gscb)
4326 __ctl_clear_bit(2, 4);
4327 vcpu->arch.host_gscb = NULL;
4330 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4333 static void store_regs(struct kvm_vcpu *vcpu)
4335 struct kvm_run *kvm_run = vcpu->run;
4337 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4338 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4339 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4340 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4341 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4342 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4343 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4344 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4345 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4346 save_access_regs(vcpu->run->s.regs.acrs);
4347 restore_access_regs(vcpu->arch.host_acrs);
4348 /* Save guest register state */
4350 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4351 /* Restore will be done lazily at return */
4352 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4353 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4354 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4355 store_regs_fmt2(vcpu);
4358 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4360 struct kvm_run *kvm_run = vcpu->run;
4363 if (kvm_run->immediate_exit)
4366 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4367 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4372 if (guestdbg_exit_pending(vcpu)) {
4373 kvm_s390_prepare_debug_exit(vcpu);
4378 kvm_sigset_activate(vcpu);
4381 * no need to check the return value of vcpu_start as it can only have
4382 * an error for protvirt, but protvirt means user cpu state
4384 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4385 kvm_s390_vcpu_start(vcpu);
4386 } else if (is_vcpu_stopped(vcpu)) {
4387 pr_err_ratelimited("can't run stopped vcpu %d\n",
4394 enable_cpu_timer_accounting(vcpu);
4397 rc = __vcpu_run(vcpu);
4399 if (signal_pending(current) && !rc) {
4400 kvm_run->exit_reason = KVM_EXIT_INTR;
4404 if (guestdbg_exit_pending(vcpu) && !rc) {
4405 kvm_s390_prepare_debug_exit(vcpu);
4409 if (rc == -EREMOTE) {
4410 /* userspace support is needed, kvm_run has been prepared */
4414 disable_cpu_timer_accounting(vcpu);
4417 kvm_sigset_deactivate(vcpu);
4419 vcpu->stat.exit_userspace++;
4426 * store status at address
4427 * we use have two special cases:
4428 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4429 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4431 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4433 unsigned char archmode = 1;
4434 freg_t fprs[NUM_FPRS];
4439 px = kvm_s390_get_prefix(vcpu);
4440 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4441 if (write_guest_abs(vcpu, 163, &archmode, 1))
4444 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4445 if (write_guest_real(vcpu, 163, &archmode, 1))
4449 gpa -= __LC_FPREGS_SAVE_AREA;
4451 /* manually convert vector registers if necessary */
4452 if (MACHINE_HAS_VX) {
4453 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4454 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4457 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4458 vcpu->run->s.regs.fprs, 128);
4460 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4461 vcpu->run->s.regs.gprs, 128);
4462 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4463 &vcpu->arch.sie_block->gpsw, 16);
4464 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4466 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4467 &vcpu->run->s.regs.fpc, 4);
4468 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4469 &vcpu->arch.sie_block->todpr, 4);
4470 cputm = kvm_s390_get_cpu_timer(vcpu);
4471 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4473 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4474 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4476 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4477 &vcpu->run->s.regs.acrs, 64);
4478 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4479 &vcpu->arch.sie_block->gcr, 128);
4480 return rc ? -EFAULT : 0;
4483 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4486 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4487 * switch in the run ioctl. Let's update our copies before we save
4488 * it into the save area
4491 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4492 save_access_regs(vcpu->run->s.regs.acrs);
4494 return kvm_s390_store_status_unloaded(vcpu, addr);
4497 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4499 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4500 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4503 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4506 struct kvm_vcpu *vcpu;
4508 kvm_for_each_vcpu(i, vcpu, kvm) {
4509 __disable_ibs_on_vcpu(vcpu);
4513 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4517 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4518 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4521 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4523 int i, online_vcpus, r = 0, started_vcpus = 0;
4525 if (!is_vcpu_stopped(vcpu))
4528 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4529 /* Only one cpu at a time may enter/leave the STOPPED state. */
4530 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4531 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4533 /* Let's tell the UV that we want to change into the operating state */
4534 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4535 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4537 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4542 for (i = 0; i < online_vcpus; i++) {
4543 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4547 if (started_vcpus == 0) {
4548 /* we're the only active VCPU -> speed it up */
4549 __enable_ibs_on_vcpu(vcpu);
4550 } else if (started_vcpus == 1) {
4552 * As we are starting a second VCPU, we have to disable
4553 * the IBS facility on all VCPUs to remove potentially
4554 * outstanding ENABLE requests.
4556 __disable_ibs_on_all_vcpus(vcpu->kvm);
4559 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4561 * The real PSW might have changed due to a RESTART interpreted by the
4562 * ultravisor. We block all interrupts and let the next sie exit
4565 if (kvm_s390_pv_cpu_is_protected(vcpu))
4566 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4568 * Another VCPU might have used IBS while we were offline.
4569 * Let's play safe and flush the VCPU at startup.
4571 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4572 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4576 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4578 int i, online_vcpus, r = 0, started_vcpus = 0;
4579 struct kvm_vcpu *started_vcpu = NULL;
4581 if (is_vcpu_stopped(vcpu))
4584 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4585 /* Only one cpu at a time may enter/leave the STOPPED state. */
4586 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4587 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4589 /* Let's tell the UV that we want to change into the stopped state */
4590 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4591 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4593 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4598 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4599 kvm_s390_clear_stop_irq(vcpu);
4601 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4602 __disable_ibs_on_vcpu(vcpu);
4604 for (i = 0; i < online_vcpus; i++) {
4605 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4607 started_vcpu = vcpu->kvm->vcpus[i];
4611 if (started_vcpus == 1) {
4613 * As we only have one VCPU left, we want to enable the
4614 * IBS facility for that VCPU to speed it up.
4616 __enable_ibs_on_vcpu(started_vcpu);
4619 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4623 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4624 struct kvm_enable_cap *cap)
4632 case KVM_CAP_S390_CSS_SUPPORT:
4633 if (!vcpu->kvm->arch.css_support) {
4634 vcpu->kvm->arch.css_support = 1;
4635 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4636 trace_kvm_s390_enable_css(vcpu->kvm);
4647 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4648 struct kvm_s390_mem_op *mop)
4650 void __user *uaddr = (void __user *)mop->buf;
4653 if (mop->flags || !mop->size)
4655 if (mop->size + mop->sida_offset < mop->size)
4657 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4661 case KVM_S390_MEMOP_SIDA_READ:
4662 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4663 mop->sida_offset), mop->size))
4667 case KVM_S390_MEMOP_SIDA_WRITE:
4668 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4669 mop->sida_offset), uaddr, mop->size))
4675 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4676 struct kvm_s390_mem_op *mop)
4678 void __user *uaddr = (void __user *)mop->buf;
4679 void *tmpbuf = NULL;
4681 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4682 | KVM_S390_MEMOP_F_CHECK_ONLY;
4684 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4687 if (mop->size > MEM_OP_MAX_SIZE)
4690 if (kvm_s390_pv_cpu_is_protected(vcpu))
4693 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4694 tmpbuf = vmalloc(mop->size);
4700 case KVM_S390_MEMOP_LOGICAL_READ:
4701 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4702 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4703 mop->size, GACC_FETCH);
4706 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4708 if (copy_to_user(uaddr, tmpbuf, mop->size))
4712 case KVM_S390_MEMOP_LOGICAL_WRITE:
4713 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4714 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4715 mop->size, GACC_STORE);
4718 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4722 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4726 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4727 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4733 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4734 struct kvm_s390_mem_op *mop)
4738 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4741 case KVM_S390_MEMOP_LOGICAL_READ:
4742 case KVM_S390_MEMOP_LOGICAL_WRITE:
4743 r = kvm_s390_guest_mem_op(vcpu, mop);
4745 case KVM_S390_MEMOP_SIDA_READ:
4746 case KVM_S390_MEMOP_SIDA_WRITE:
4747 /* we are locked against sida going away by the vcpu->mutex */
4748 r = kvm_s390_guest_sida_op(vcpu, mop);
4754 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4758 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4759 unsigned int ioctl, unsigned long arg)
4761 struct kvm_vcpu *vcpu = filp->private_data;
4762 void __user *argp = (void __user *)arg;
4765 case KVM_S390_IRQ: {
4766 struct kvm_s390_irq s390irq;
4768 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4770 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4772 case KVM_S390_INTERRUPT: {
4773 struct kvm_s390_interrupt s390int;
4774 struct kvm_s390_irq s390irq = {};
4776 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4778 if (s390int_to_s390irq(&s390int, &s390irq))
4780 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4783 return -ENOIOCTLCMD;
4786 long kvm_arch_vcpu_ioctl(struct file *filp,
4787 unsigned int ioctl, unsigned long arg)
4789 struct kvm_vcpu *vcpu = filp->private_data;
4790 void __user *argp = (void __user *)arg;
4798 case KVM_S390_STORE_STATUS:
4799 idx = srcu_read_lock(&vcpu->kvm->srcu);
4800 r = kvm_s390_store_status_unloaded(vcpu, arg);
4801 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4803 case KVM_S390_SET_INITIAL_PSW: {
4807 if (copy_from_user(&psw, argp, sizeof(psw)))
4809 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4812 case KVM_S390_CLEAR_RESET:
4814 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4815 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4816 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4817 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4818 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4822 case KVM_S390_INITIAL_RESET:
4824 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4825 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4826 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4827 UVC_CMD_CPU_RESET_INITIAL,
4829 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4833 case KVM_S390_NORMAL_RESET:
4835 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4836 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 UVC_CMD_CPU_RESET, &rc, &rrc);
4839 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4843 case KVM_SET_ONE_REG:
4844 case KVM_GET_ONE_REG: {
4845 struct kvm_one_reg reg;
4847 if (kvm_s390_pv_cpu_is_protected(vcpu))
4850 if (copy_from_user(®, argp, sizeof(reg)))
4852 if (ioctl == KVM_SET_ONE_REG)
4853 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4855 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4858 #ifdef CONFIG_KVM_S390_UCONTROL
4859 case KVM_S390_UCAS_MAP: {
4860 struct kvm_s390_ucas_mapping ucasmap;
4862 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4867 if (!kvm_is_ucontrol(vcpu->kvm)) {
4872 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4873 ucasmap.vcpu_addr, ucasmap.length);
4876 case KVM_S390_UCAS_UNMAP: {
4877 struct kvm_s390_ucas_mapping ucasmap;
4879 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4884 if (!kvm_is_ucontrol(vcpu->kvm)) {
4889 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4894 case KVM_S390_VCPU_FAULT: {
4895 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4898 case KVM_ENABLE_CAP:
4900 struct kvm_enable_cap cap;
4902 if (copy_from_user(&cap, argp, sizeof(cap)))
4904 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4907 case KVM_S390_MEM_OP: {
4908 struct kvm_s390_mem_op mem_op;
4910 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4911 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4916 case KVM_S390_SET_IRQ_STATE: {
4917 struct kvm_s390_irq_state irq_state;
4920 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4922 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4923 irq_state.len == 0 ||
4924 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4928 /* do not use irq_state.flags, it will break old QEMUs */
4929 r = kvm_s390_set_irq_state(vcpu,
4930 (void __user *) irq_state.buf,
4934 case KVM_S390_GET_IRQ_STATE: {
4935 struct kvm_s390_irq_state irq_state;
4938 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4940 if (irq_state.len == 0) {
4944 /* do not use irq_state.flags, it will break old QEMUs */
4945 r = kvm_s390_get_irq_state(vcpu,
4946 (__u8 __user *) irq_state.buf,
4958 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4960 #ifdef CONFIG_KVM_S390_UCONTROL
4961 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4962 && (kvm_is_ucontrol(vcpu->kvm))) {
4963 vmf->page = virt_to_page(vcpu->arch.sie_block);
4964 get_page(vmf->page);
4968 return VM_FAULT_SIGBUS;
4971 /* Section: memory related */
4972 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4973 struct kvm_memory_slot *memslot,
4974 const struct kvm_userspace_memory_region *mem,
4975 enum kvm_mr_change change)
4977 /* A few sanity checks. We can have memory slots which have to be
4978 located/ended at a segment boundary (1MB). The memory in userland is
4979 ok to be fragmented into various different vmas. It is okay to mmap()
4980 and munmap() stuff in this slot after doing this call at any time */
4982 if (mem->userspace_addr & 0xffffful)
4985 if (mem->memory_size & 0xffffful)
4988 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4991 /* When we are protected, we should not change the memory slots */
4992 if (kvm_s390_pv_get_handle(kvm))
4997 void kvm_arch_commit_memory_region(struct kvm *kvm,
4998 const struct kvm_userspace_memory_region *mem,
4999 struct kvm_memory_slot *old,
5000 const struct kvm_memory_slot *new,
5001 enum kvm_mr_change change)
5007 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5008 old->npages * PAGE_SIZE);
5011 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5012 old->npages * PAGE_SIZE);
5017 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5018 mem->guest_phys_addr, mem->memory_size);
5020 case KVM_MR_FLAGS_ONLY:
5023 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5026 pr_warn("failed to commit memory region\n");
5030 static inline unsigned long nonhyp_mask(int i)
5032 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5034 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5037 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5039 vcpu->valid_wakeup = false;
5042 static int __init kvm_s390_init(void)
5046 if (!sclp.has_sief2) {
5047 pr_info("SIE is not available\n");
5051 if (nested && hpage) {
5052 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5056 for (i = 0; i < 16; i++)
5057 kvm_s390_fac_base[i] |=
5058 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5060 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5063 static void __exit kvm_s390_exit(void)
5068 module_init(kvm_s390_init);
5069 module_exit(kvm_s390_exit);
5072 * Enable autoloading of the kvm module.
5073 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5074 * since x86 takes a different approach.
5076 #include <linux/miscdevice.h>
5077 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5078 MODULE_ALIAS("devname:kvm");