1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
69 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
70 sizeof(struct kvm_vm_stat) / sizeof(u64));
72 const struct kvm_stats_header kvm_vm_stats_header = {
73 .name_size = KVM_STATS_NAME_SIZE,
74 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
75 .id_offset = sizeof(struct kvm_stats_header),
76 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
77 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
78 sizeof(kvm_vm_stats_desc),
81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
82 KVM_GENERIC_VCPU_STATS(),
83 STATS_DESC_COUNTER(VCPU, exit_userspace),
84 STATS_DESC_COUNTER(VCPU, exit_null),
85 STATS_DESC_COUNTER(VCPU, exit_external_request),
86 STATS_DESC_COUNTER(VCPU, exit_io_request),
87 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
88 STATS_DESC_COUNTER(VCPU, exit_stop_request),
89 STATS_DESC_COUNTER(VCPU, exit_validity),
90 STATS_DESC_COUNTER(VCPU, exit_instruction),
91 STATS_DESC_COUNTER(VCPU, exit_pei),
92 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
93 STATS_DESC_COUNTER(VCPU, instruction_lctl),
94 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
95 STATS_DESC_COUNTER(VCPU, instruction_stctl),
96 STATS_DESC_COUNTER(VCPU, instruction_stctg),
97 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
98 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
99 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
100 STATS_DESC_COUNTER(VCPU, deliver_ckc),
101 STATS_DESC_COUNTER(VCPU, deliver_cputm),
102 STATS_DESC_COUNTER(VCPU, deliver_external_call),
103 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
104 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_virtio),
106 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
108 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
109 STATS_DESC_COUNTER(VCPU, deliver_program),
110 STATS_DESC_COUNTER(VCPU, deliver_io),
111 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
112 STATS_DESC_COUNTER(VCPU, exit_wait_state),
113 STATS_DESC_COUNTER(VCPU, inject_ckc),
114 STATS_DESC_COUNTER(VCPU, inject_cputm),
115 STATS_DESC_COUNTER(VCPU, inject_external_call),
116 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
117 STATS_DESC_COUNTER(VCPU, inject_mchk),
118 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
119 STATS_DESC_COUNTER(VCPU, inject_program),
120 STATS_DESC_COUNTER(VCPU, inject_restart),
121 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
122 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
123 STATS_DESC_COUNTER(VCPU, instruction_epsw),
124 STATS_DESC_COUNTER(VCPU, instruction_gs),
125 STATS_DESC_COUNTER(VCPU, instruction_io_other),
126 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
127 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
128 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
129 STATS_DESC_COUNTER(VCPU, instruction_ptff),
130 STATS_DESC_COUNTER(VCPU, instruction_sck),
131 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
132 STATS_DESC_COUNTER(VCPU, instruction_stidp),
133 STATS_DESC_COUNTER(VCPU, instruction_spx),
134 STATS_DESC_COUNTER(VCPU, instruction_stpx),
135 STATS_DESC_COUNTER(VCPU, instruction_stap),
136 STATS_DESC_COUNTER(VCPU, instruction_iske),
137 STATS_DESC_COUNTER(VCPU, instruction_ri),
138 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
139 STATS_DESC_COUNTER(VCPU, instruction_sske),
140 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
141 STATS_DESC_COUNTER(VCPU, instruction_stsi),
142 STATS_DESC_COUNTER(VCPU, instruction_stfl),
143 STATS_DESC_COUNTER(VCPU, instruction_tb),
144 STATS_DESC_COUNTER(VCPU, instruction_tpi),
145 STATS_DESC_COUNTER(VCPU, instruction_tprot),
146 STATS_DESC_COUNTER(VCPU, instruction_tsch),
147 STATS_DESC_COUNTER(VCPU, instruction_sie),
148 STATS_DESC_COUNTER(VCPU, instruction_essa),
149 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
164 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
165 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
167 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
169 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
170 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
173 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
174 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
175 STATS_DESC_COUNTER(VCPU, pfault_sync)
177 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
178 sizeof(struct kvm_vcpu_stat) / sizeof(u64));
180 const struct kvm_stats_header kvm_vcpu_stats_header = {
181 .name_size = KVM_STATS_NAME_SIZE,
182 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
183 .id_offset = sizeof(struct kvm_stats_header),
184 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
185 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
186 sizeof(kvm_vcpu_stats_desc),
189 /* allow nested virtualization in KVM (if enabled by user space) */
191 module_param(nested, int, S_IRUGO);
192 MODULE_PARM_DESC(nested, "Nested virtualization support");
194 /* allow 1m huge page guest backing, if !nested */
196 module_param(hpage, int, 0444);
197 MODULE_PARM_DESC(hpage, "1m huge page backing support");
199 /* maximum percentage of steal time for polling. >100 is treated like 100 */
200 static u8 halt_poll_max_steal = 10;
201 module_param(halt_poll_max_steal, byte, 0644);
202 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
204 /* if set to true, the GISA will be initialized and used if available */
205 static bool use_gisa = true;
206 module_param(use_gisa, bool, 0644);
207 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
209 /* maximum diag9c forwarding per second */
210 unsigned int diag9c_forwarding_hz;
211 module_param(diag9c_forwarding_hz, uint, 0644);
212 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
215 * For now we handle at most 16 double words as this is what the s390 base
216 * kernel handles and stores in the prefix page. If we ever need to go beyond
217 * this, this requires changes to code, but the external uapi can stay.
219 #define SIZE_INTERNAL 16
222 * Base feature mask that defines default mask for facilities. Consists of the
223 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
225 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
227 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
228 * and defines the facilities that can be enabled via a cpu model.
230 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
232 static unsigned long kvm_s390_fac_size(void)
234 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
235 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
236 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
237 sizeof(stfle_fac_list));
239 return SIZE_INTERNAL;
242 /* available cpu features supported by kvm */
243 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
244 /* available subfunctions indicated via query / "test bit" */
245 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
247 static struct gmap_notifier gmap_notifier;
248 static struct gmap_notifier vsie_gmap_notifier;
249 debug_info_t *kvm_s390_dbf;
250 debug_info_t *kvm_s390_dbf_uv;
252 /* Section: not file related */
253 int kvm_arch_hardware_enable(void)
255 /* every s390 is virtualization enabled ;-) */
259 int kvm_arch_check_processor_compat(void *opaque)
264 /* forward declarations */
265 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
267 static int sca_switch_to_extended(struct kvm *kvm);
269 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
274 * The TOD jumps by delta, we have to compensate this by adding
275 * -delta to the epoch.
279 /* sign-extension - we're adding to signed values below */
284 if (scb->ecd & ECD_MEF) {
285 scb->epdx += delta_idx;
286 if (scb->epoch < delta)
292 * This callback is executed during stop_machine(). All CPUs are therefore
293 * temporarily stopped. In order not to change guest behavior, we have to
294 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
295 * so a CPU won't be stopped while calculating with the epoch.
297 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
301 struct kvm_vcpu *vcpu;
303 unsigned long long *delta = v;
305 list_for_each_entry(kvm, &vm_list, vm_list) {
306 kvm_for_each_vcpu(i, vcpu, kvm) {
307 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
309 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
310 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
312 if (vcpu->arch.cputm_enabled)
313 vcpu->arch.cputm_start += *delta;
314 if (vcpu->arch.vsie_block)
315 kvm_clock_sync_scb(vcpu->arch.vsie_block,
322 static struct notifier_block kvm_clock_notifier = {
323 .notifier_call = kvm_clock_sync,
326 int kvm_arch_hardware_setup(void *opaque)
328 gmap_notifier.notifier_call = kvm_gmap_notifier;
329 gmap_register_pte_notifier(&gmap_notifier);
330 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
331 gmap_register_pte_notifier(&vsie_gmap_notifier);
332 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
333 &kvm_clock_notifier);
337 void kvm_arch_hardware_unsetup(void)
339 gmap_unregister_pte_notifier(&gmap_notifier);
340 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
341 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
342 &kvm_clock_notifier);
345 static void allow_cpu_feat(unsigned long nr)
347 set_bit_inv(nr, kvm_s390_available_cpu_feat);
350 static inline int plo_test_bit(unsigned char nr)
352 unsigned long function = (unsigned long)nr | 0x100;
356 " lgr 0,%[function]\n"
357 /* Parameter registers are ignored for "test bit" */
362 : [function] "d" (function)
367 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
372 /* Parameter registers are ignored */
373 " .insn rrf,%[opc] << 16,2,4,6,0\n"
375 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
376 : "cc", "memory", "0", "1");
379 #define INSN_SORTL 0xb938
380 #define INSN_DFLTCC 0xb939
382 static void kvm_s390_cpu_feat_init(void)
386 for (i = 0; i < 256; ++i) {
388 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
391 if (test_facility(28)) /* TOD-clock steering */
392 ptff(kvm_s390_available_subfunc.ptff,
393 sizeof(kvm_s390_available_subfunc.ptff),
396 if (test_facility(17)) { /* MSA */
397 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.kmac);
399 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kmc);
401 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.km);
403 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
404 kvm_s390_available_subfunc.kimd);
405 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.klmd);
408 if (test_facility(76)) /* MSA3 */
409 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
410 kvm_s390_available_subfunc.pckmo);
411 if (test_facility(77)) { /* MSA4 */
412 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmctr);
414 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.kmf);
416 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
417 kvm_s390_available_subfunc.kmo);
418 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.pcc);
421 if (test_facility(57)) /* MSA5 */
422 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.ppno);
425 if (test_facility(146)) /* MSA8 */
426 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kma);
429 if (test_facility(155)) /* MSA9 */
430 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
431 kvm_s390_available_subfunc.kdsa);
433 if (test_facility(150)) /* SORTL */
434 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
436 if (test_facility(151)) /* DFLTCC */
437 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
439 if (MACHINE_HAS_ESOP)
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
442 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
443 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
445 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
446 !test_facility(3) || !nested)
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
449 if (sclp.has_64bscao)
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
462 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
464 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
466 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
467 * all skey handling functions read/set the skey from the PGSTE
468 * instead of the real storage key.
470 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
471 * pages being detected as preserved although they are resident.
473 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
474 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
476 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
477 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
478 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
480 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
481 * cannot easily shadow the SCA because of the ipte lock.
485 int kvm_arch_init(void *opaque)
489 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
493 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
494 if (!kvm_s390_dbf_uv)
497 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
498 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
501 kvm_s390_cpu_feat_init();
503 /* Register floating interrupt controller interface. */
504 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
506 pr_err("A FLIC registration call failed with rc=%d\n", rc);
510 rc = kvm_s390_gib_init(GAL_ISC);
521 void kvm_arch_exit(void)
523 kvm_s390_gib_destroy();
524 debug_unregister(kvm_s390_dbf);
525 debug_unregister(kvm_s390_dbf_uv);
528 /* Section: device related */
529 long kvm_arch_dev_ioctl(struct file *filp,
530 unsigned int ioctl, unsigned long arg)
532 if (ioctl == KVM_S390_ENABLE_SIE)
533 return s390_enable_sie();
537 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
542 case KVM_CAP_S390_PSW:
543 case KVM_CAP_S390_GMAP:
544 case KVM_CAP_SYNC_MMU:
545 #ifdef CONFIG_KVM_S390_UCONTROL
546 case KVM_CAP_S390_UCONTROL:
548 case KVM_CAP_ASYNC_PF:
549 case KVM_CAP_SYNC_REGS:
550 case KVM_CAP_ONE_REG:
551 case KVM_CAP_ENABLE_CAP:
552 case KVM_CAP_S390_CSS_SUPPORT:
553 case KVM_CAP_IOEVENTFD:
554 case KVM_CAP_DEVICE_CTRL:
555 case KVM_CAP_S390_IRQCHIP:
556 case KVM_CAP_VM_ATTRIBUTES:
557 case KVM_CAP_MP_STATE:
558 case KVM_CAP_IMMEDIATE_EXIT:
559 case KVM_CAP_S390_INJECT_IRQ:
560 case KVM_CAP_S390_USER_SIGP:
561 case KVM_CAP_S390_USER_STSI:
562 case KVM_CAP_S390_SKEYS:
563 case KVM_CAP_S390_IRQ_STATE:
564 case KVM_CAP_S390_USER_INSTR0:
565 case KVM_CAP_S390_CMMA_MIGRATION:
566 case KVM_CAP_S390_AIS:
567 case KVM_CAP_S390_AIS_MIGRATION:
568 case KVM_CAP_S390_VCPU_RESETS:
569 case KVM_CAP_SET_GUEST_DEBUG:
570 case KVM_CAP_S390_DIAG318:
573 case KVM_CAP_SET_GUEST_DEBUG2:
574 r = KVM_GUESTDBG_VALID_MASK;
576 case KVM_CAP_S390_HPAGE_1M:
578 if (hpage && !kvm_is_ucontrol(kvm))
581 case KVM_CAP_S390_MEM_OP:
584 case KVM_CAP_NR_VCPUS:
585 case KVM_CAP_MAX_VCPUS:
586 case KVM_CAP_MAX_VCPU_ID:
587 r = KVM_S390_BSCA_CPU_SLOTS;
588 if (!kvm_s390_use_sca_entries())
590 else if (sclp.has_esca && sclp.has_64bscao)
591 r = KVM_S390_ESCA_CPU_SLOTS;
593 case KVM_CAP_S390_COW:
594 r = MACHINE_HAS_ESOP;
596 case KVM_CAP_S390_VECTOR_REGISTERS:
599 case KVM_CAP_S390_RI:
600 r = test_facility(64);
602 case KVM_CAP_S390_GS:
603 r = test_facility(133);
605 case KVM_CAP_S390_BPB:
606 r = test_facility(82);
608 case KVM_CAP_S390_PROTECTED:
609 r = is_prot_virt_host();
617 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
620 gfn_t cur_gfn, last_gfn;
621 unsigned long gaddr, vmaddr;
622 struct gmap *gmap = kvm->arch.gmap;
623 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
625 /* Loop over all guest segments */
626 cur_gfn = memslot->base_gfn;
627 last_gfn = memslot->base_gfn + memslot->npages;
628 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
629 gaddr = gfn_to_gpa(cur_gfn);
630 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
631 if (kvm_is_error_hva(vmaddr))
634 bitmap_zero(bitmap, _PAGE_ENTRIES);
635 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
636 for (i = 0; i < _PAGE_ENTRIES; i++) {
637 if (test_bit(i, bitmap))
638 mark_page_dirty(kvm, cur_gfn + i);
641 if (fatal_signal_pending(current))
647 /* Section: vm related */
648 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
651 * Get (and clear) the dirty memory log for a memory slot.
653 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
654 struct kvm_dirty_log *log)
658 struct kvm_memory_slot *memslot;
661 if (kvm_is_ucontrol(kvm))
664 mutex_lock(&kvm->slots_lock);
667 if (log->slot >= KVM_USER_MEM_SLOTS)
670 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
674 /* Clear the dirty log */
676 n = kvm_dirty_bitmap_bytes(memslot);
677 memset(memslot->dirty_bitmap, 0, n);
681 mutex_unlock(&kvm->slots_lock);
685 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
688 struct kvm_vcpu *vcpu;
690 kvm_for_each_vcpu(i, vcpu, kvm) {
691 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
695 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
703 case KVM_CAP_S390_IRQCHIP:
704 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
705 kvm->arch.use_irqchip = 1;
708 case KVM_CAP_S390_USER_SIGP:
709 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
710 kvm->arch.user_sigp = 1;
713 case KVM_CAP_S390_VECTOR_REGISTERS:
714 mutex_lock(&kvm->lock);
715 if (kvm->created_vcpus) {
717 } else if (MACHINE_HAS_VX) {
718 set_kvm_facility(kvm->arch.model.fac_mask, 129);
719 set_kvm_facility(kvm->arch.model.fac_list, 129);
720 if (test_facility(134)) {
721 set_kvm_facility(kvm->arch.model.fac_mask, 134);
722 set_kvm_facility(kvm->arch.model.fac_list, 134);
724 if (test_facility(135)) {
725 set_kvm_facility(kvm->arch.model.fac_mask, 135);
726 set_kvm_facility(kvm->arch.model.fac_list, 135);
728 if (test_facility(148)) {
729 set_kvm_facility(kvm->arch.model.fac_mask, 148);
730 set_kvm_facility(kvm->arch.model.fac_list, 148);
732 if (test_facility(152)) {
733 set_kvm_facility(kvm->arch.model.fac_mask, 152);
734 set_kvm_facility(kvm->arch.model.fac_list, 152);
736 if (test_facility(192)) {
737 set_kvm_facility(kvm->arch.model.fac_mask, 192);
738 set_kvm_facility(kvm->arch.model.fac_list, 192);
743 mutex_unlock(&kvm->lock);
744 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
745 r ? "(not available)" : "(success)");
747 case KVM_CAP_S390_RI:
749 mutex_lock(&kvm->lock);
750 if (kvm->created_vcpus) {
752 } else if (test_facility(64)) {
753 set_kvm_facility(kvm->arch.model.fac_mask, 64);
754 set_kvm_facility(kvm->arch.model.fac_list, 64);
757 mutex_unlock(&kvm->lock);
758 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
759 r ? "(not available)" : "(success)");
761 case KVM_CAP_S390_AIS:
762 mutex_lock(&kvm->lock);
763 if (kvm->created_vcpus) {
766 set_kvm_facility(kvm->arch.model.fac_mask, 72);
767 set_kvm_facility(kvm->arch.model.fac_list, 72);
770 mutex_unlock(&kvm->lock);
771 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
772 r ? "(not available)" : "(success)");
774 case KVM_CAP_S390_GS:
776 mutex_lock(&kvm->lock);
777 if (kvm->created_vcpus) {
779 } else if (test_facility(133)) {
780 set_kvm_facility(kvm->arch.model.fac_mask, 133);
781 set_kvm_facility(kvm->arch.model.fac_list, 133);
784 mutex_unlock(&kvm->lock);
785 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
786 r ? "(not available)" : "(success)");
788 case KVM_CAP_S390_HPAGE_1M:
789 mutex_lock(&kvm->lock);
790 if (kvm->created_vcpus)
792 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
796 mmap_write_lock(kvm->mm);
797 kvm->mm->context.allow_gmap_hpage_1m = 1;
798 mmap_write_unlock(kvm->mm);
800 * We might have to create fake 4k page
801 * tables. To avoid that the hardware works on
802 * stale PGSTEs, we emulate these instructions.
804 kvm->arch.use_skf = 0;
805 kvm->arch.use_pfmfi = 0;
807 mutex_unlock(&kvm->lock);
808 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
809 r ? "(not available)" : "(success)");
811 case KVM_CAP_S390_USER_STSI:
812 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
813 kvm->arch.user_stsi = 1;
816 case KVM_CAP_S390_USER_INSTR0:
817 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
818 kvm->arch.user_instr0 = 1;
819 icpt_operexc_on_all_vcpus(kvm);
829 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
833 switch (attr->attr) {
834 case KVM_S390_VM_MEM_LIMIT_SIZE:
836 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
837 kvm->arch.mem_limit);
838 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
848 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
852 switch (attr->attr) {
853 case KVM_S390_VM_MEM_ENABLE_CMMA:
858 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
859 mutex_lock(&kvm->lock);
860 if (kvm->created_vcpus)
862 else if (kvm->mm->context.allow_gmap_hpage_1m)
865 kvm->arch.use_cmma = 1;
866 /* Not compatible with cmma. */
867 kvm->arch.use_pfmfi = 0;
870 mutex_unlock(&kvm->lock);
872 case KVM_S390_VM_MEM_CLR_CMMA:
877 if (!kvm->arch.use_cmma)
880 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
881 mutex_lock(&kvm->lock);
882 idx = srcu_read_lock(&kvm->srcu);
883 s390_reset_cmma(kvm->arch.gmap->mm);
884 srcu_read_unlock(&kvm->srcu, idx);
885 mutex_unlock(&kvm->lock);
888 case KVM_S390_VM_MEM_LIMIT_SIZE: {
889 unsigned long new_limit;
891 if (kvm_is_ucontrol(kvm))
894 if (get_user(new_limit, (u64 __user *)attr->addr))
897 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
898 new_limit > kvm->arch.mem_limit)
904 /* gmap_create takes last usable address */
905 if (new_limit != KVM_S390_NO_MEM_LIMIT)
909 mutex_lock(&kvm->lock);
910 if (!kvm->created_vcpus) {
911 /* gmap_create will round the limit up */
912 struct gmap *new = gmap_create(current->mm, new_limit);
917 gmap_remove(kvm->arch.gmap);
919 kvm->arch.gmap = new;
923 mutex_unlock(&kvm->lock);
924 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
925 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
926 (void *) kvm->arch.gmap->asce);
936 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
938 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
940 struct kvm_vcpu *vcpu;
943 kvm_s390_vcpu_block_all(kvm);
945 kvm_for_each_vcpu(i, vcpu, kvm) {
946 kvm_s390_vcpu_crypto_setup(vcpu);
947 /* recreate the shadow crycb by leaving the VSIE handler */
948 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
951 kvm_s390_vcpu_unblock_all(kvm);
954 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
956 mutex_lock(&kvm->lock);
957 switch (attr->attr) {
958 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
959 if (!test_kvm_facility(kvm, 76)) {
960 mutex_unlock(&kvm->lock);
964 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
965 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
966 kvm->arch.crypto.aes_kw = 1;
967 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
969 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
970 if (!test_kvm_facility(kvm, 76)) {
971 mutex_unlock(&kvm->lock);
975 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
976 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
977 kvm->arch.crypto.dea_kw = 1;
978 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
980 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
981 if (!test_kvm_facility(kvm, 76)) {
982 mutex_unlock(&kvm->lock);
985 kvm->arch.crypto.aes_kw = 0;
986 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
987 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
988 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
990 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
991 if (!test_kvm_facility(kvm, 76)) {
992 mutex_unlock(&kvm->lock);
995 kvm->arch.crypto.dea_kw = 0;
996 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
997 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
998 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
1000 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1001 if (!ap_instructions_available()) {
1002 mutex_unlock(&kvm->lock);
1005 kvm->arch.crypto.apie = 1;
1007 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1008 if (!ap_instructions_available()) {
1009 mutex_unlock(&kvm->lock);
1012 kvm->arch.crypto.apie = 0;
1015 mutex_unlock(&kvm->lock);
1019 kvm_s390_vcpu_crypto_reset_all(kvm);
1020 mutex_unlock(&kvm->lock);
1024 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1027 struct kvm_vcpu *vcpu;
1029 kvm_for_each_vcpu(cx, vcpu, kvm)
1030 kvm_s390_sync_request(req, vcpu);
1034 * Must be called with kvm->srcu held to avoid races on memslots, and with
1035 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1037 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1039 struct kvm_memory_slot *ms;
1040 struct kvm_memslots *slots;
1041 unsigned long ram_pages = 0;
1044 /* migration mode already enabled */
1045 if (kvm->arch.migration_mode)
1047 slots = kvm_memslots(kvm);
1048 if (!slots || !slots->used_slots)
1051 if (!kvm->arch.use_cmma) {
1052 kvm->arch.migration_mode = 1;
1055 /* mark all the pages in active slots as dirty */
1056 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1057 ms = slots->memslots + slotnr;
1058 if (!ms->dirty_bitmap)
1061 * The second half of the bitmap is only used on x86,
1062 * and would be wasted otherwise, so we put it to good
1063 * use here to keep track of the state of the storage
1066 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1067 ram_pages += ms->npages;
1069 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1070 kvm->arch.migration_mode = 1;
1071 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1076 * Must be called with kvm->slots_lock to avoid races with ourselves and
1077 * kvm_s390_vm_start_migration.
1079 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1081 /* migration mode already disabled */
1082 if (!kvm->arch.migration_mode)
1084 kvm->arch.migration_mode = 0;
1085 if (kvm->arch.use_cmma)
1086 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1090 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1091 struct kvm_device_attr *attr)
1095 mutex_lock(&kvm->slots_lock);
1096 switch (attr->attr) {
1097 case KVM_S390_VM_MIGRATION_START:
1098 res = kvm_s390_vm_start_migration(kvm);
1100 case KVM_S390_VM_MIGRATION_STOP:
1101 res = kvm_s390_vm_stop_migration(kvm);
1106 mutex_unlock(&kvm->slots_lock);
1111 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1112 struct kvm_device_attr *attr)
1114 u64 mig = kvm->arch.migration_mode;
1116 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1119 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1124 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1126 struct kvm_s390_vm_tod_clock gtod;
1128 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1131 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1133 kvm_s390_set_tod_clock(kvm, >od);
1135 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1136 gtod.epoch_idx, gtod.tod);
1141 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1145 if (copy_from_user(>od_high, (void __user *)attr->addr,
1151 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1156 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1158 struct kvm_s390_vm_tod_clock gtod = { 0 };
1160 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1164 kvm_s390_set_tod_clock(kvm, >od);
1165 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1169 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1176 switch (attr->attr) {
1177 case KVM_S390_VM_TOD_EXT:
1178 ret = kvm_s390_set_tod_ext(kvm, attr);
1180 case KVM_S390_VM_TOD_HIGH:
1181 ret = kvm_s390_set_tod_high(kvm, attr);
1183 case KVM_S390_VM_TOD_LOW:
1184 ret = kvm_s390_set_tod_low(kvm, attr);
1193 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1194 struct kvm_s390_vm_tod_clock *gtod)
1196 union tod_clock clk;
1200 store_tod_clock_ext(&clk);
1202 gtod->tod = clk.tod + kvm->arch.epoch;
1203 gtod->epoch_idx = 0;
1204 if (test_kvm_facility(kvm, 139)) {
1205 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1206 if (gtod->tod < clk.tod)
1207 gtod->epoch_idx += 1;
1213 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1215 struct kvm_s390_vm_tod_clock gtod;
1217 memset(>od, 0, sizeof(gtod));
1218 kvm_s390_get_tod_clock(kvm, >od);
1219 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1222 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1223 gtod.epoch_idx, gtod.tod);
1227 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1231 if (copy_to_user((void __user *)attr->addr, >od_high,
1234 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1239 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1243 gtod = kvm_s390_get_tod_clock_fast(kvm);
1244 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1246 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1251 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1258 switch (attr->attr) {
1259 case KVM_S390_VM_TOD_EXT:
1260 ret = kvm_s390_get_tod_ext(kvm, attr);
1262 case KVM_S390_VM_TOD_HIGH:
1263 ret = kvm_s390_get_tod_high(kvm, attr);
1265 case KVM_S390_VM_TOD_LOW:
1266 ret = kvm_s390_get_tod_low(kvm, attr);
1275 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1277 struct kvm_s390_vm_cpu_processor *proc;
1278 u16 lowest_ibc, unblocked_ibc;
1281 mutex_lock(&kvm->lock);
1282 if (kvm->created_vcpus) {
1286 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1291 if (!copy_from_user(proc, (void __user *)attr->addr,
1293 kvm->arch.model.cpuid = proc->cpuid;
1294 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1295 unblocked_ibc = sclp.ibc & 0xfff;
1296 if (lowest_ibc && proc->ibc) {
1297 if (proc->ibc > unblocked_ibc)
1298 kvm->arch.model.ibc = unblocked_ibc;
1299 else if (proc->ibc < lowest_ibc)
1300 kvm->arch.model.ibc = lowest_ibc;
1302 kvm->arch.model.ibc = proc->ibc;
1304 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1305 S390_ARCH_FAC_LIST_SIZE_BYTE);
1306 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1307 kvm->arch.model.ibc,
1308 kvm->arch.model.cpuid);
1309 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1310 kvm->arch.model.fac_list[0],
1311 kvm->arch.model.fac_list[1],
1312 kvm->arch.model.fac_list[2]);
1317 mutex_unlock(&kvm->lock);
1321 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1322 struct kvm_device_attr *attr)
1324 struct kvm_s390_vm_cpu_feat data;
1326 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1328 if (!bitmap_subset((unsigned long *) data.feat,
1329 kvm_s390_available_cpu_feat,
1330 KVM_S390_VM_CPU_FEAT_NR_BITS))
1333 mutex_lock(&kvm->lock);
1334 if (kvm->created_vcpus) {
1335 mutex_unlock(&kvm->lock);
1338 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1339 KVM_S390_VM_CPU_FEAT_NR_BITS);
1340 mutex_unlock(&kvm->lock);
1341 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1348 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1349 struct kvm_device_attr *attr)
1351 mutex_lock(&kvm->lock);
1352 if (kvm->created_vcpus) {
1353 mutex_unlock(&kvm->lock);
1357 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1358 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1359 mutex_unlock(&kvm->lock);
1362 mutex_unlock(&kvm->lock);
1364 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1366 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1367 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1368 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1369 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1371 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1372 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1375 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1378 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1380 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1381 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1384 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1387 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1389 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1390 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1393 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1396 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1397 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1398 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1399 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1400 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1401 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1402 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1404 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1405 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1407 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1408 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1409 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1410 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1411 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1413 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1414 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1416 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1418 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1419 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1420 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1425 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1429 switch (attr->attr) {
1430 case KVM_S390_VM_CPU_PROCESSOR:
1431 ret = kvm_s390_set_processor(kvm, attr);
1433 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1434 ret = kvm_s390_set_processor_feat(kvm, attr);
1436 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1437 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1443 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1445 struct kvm_s390_vm_cpu_processor *proc;
1448 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1453 proc->cpuid = kvm->arch.model.cpuid;
1454 proc->ibc = kvm->arch.model.ibc;
1455 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1456 S390_ARCH_FAC_LIST_SIZE_BYTE);
1457 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458 kvm->arch.model.ibc,
1459 kvm->arch.model.cpuid);
1460 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461 kvm->arch.model.fac_list[0],
1462 kvm->arch.model.fac_list[1],
1463 kvm->arch.model.fac_list[2]);
1464 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1471 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1473 struct kvm_s390_vm_cpu_machine *mach;
1476 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1481 get_cpu_id((struct cpuid *) &mach->cpuid);
1482 mach->ibc = sclp.ibc;
1483 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1484 S390_ARCH_FAC_LIST_SIZE_BYTE);
1485 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1486 sizeof(stfle_fac_list));
1487 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1488 kvm->arch.model.ibc,
1489 kvm->arch.model.cpuid);
1490 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1494 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1498 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1505 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1506 struct kvm_device_attr *attr)
1508 struct kvm_s390_vm_cpu_feat data;
1510 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1511 KVM_S390_VM_CPU_FEAT_NR_BITS);
1512 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1514 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1521 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1522 struct kvm_device_attr *attr)
1524 struct kvm_s390_vm_cpu_feat data;
1526 bitmap_copy((unsigned long *) data.feat,
1527 kvm_s390_available_cpu_feat,
1528 KVM_S390_VM_CPU_FEAT_NR_BITS);
1529 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1531 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1538 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1539 struct kvm_device_attr *attr)
1541 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1542 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1545 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1547 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1548 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1549 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1550 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1552 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1553 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1556 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1559 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1562 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1565 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1568 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1570 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1571 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1574 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1577 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1578 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1579 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1580 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1581 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1582 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1583 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1585 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1586 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1588 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1589 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1591 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1592 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1594 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1595 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1597 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1599 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1600 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1601 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1606 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1607 struct kvm_device_attr *attr)
1609 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1610 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1613 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1615 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1616 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1617 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1618 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1621 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1624 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1627 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1630 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1633 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1636 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1639 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1640 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1641 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1642 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1643 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1644 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1645 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1646 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1647 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1648 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1649 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1650 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1651 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1654 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1655 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1656 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1657 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1658 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1659 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1660 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1662 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1663 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1664 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1665 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1667 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1668 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1669 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1674 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1678 switch (attr->attr) {
1679 case KVM_S390_VM_CPU_PROCESSOR:
1680 ret = kvm_s390_get_processor(kvm, attr);
1682 case KVM_S390_VM_CPU_MACHINE:
1683 ret = kvm_s390_get_machine(kvm, attr);
1685 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1686 ret = kvm_s390_get_processor_feat(kvm, attr);
1688 case KVM_S390_VM_CPU_MACHINE_FEAT:
1689 ret = kvm_s390_get_machine_feat(kvm, attr);
1691 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1692 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1694 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1695 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1701 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1705 switch (attr->group) {
1706 case KVM_S390_VM_MEM_CTRL:
1707 ret = kvm_s390_set_mem_control(kvm, attr);
1709 case KVM_S390_VM_TOD:
1710 ret = kvm_s390_set_tod(kvm, attr);
1712 case KVM_S390_VM_CPU_MODEL:
1713 ret = kvm_s390_set_cpu_model(kvm, attr);
1715 case KVM_S390_VM_CRYPTO:
1716 ret = kvm_s390_vm_set_crypto(kvm, attr);
1718 case KVM_S390_VM_MIGRATION:
1719 ret = kvm_s390_vm_set_migration(kvm, attr);
1729 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1733 switch (attr->group) {
1734 case KVM_S390_VM_MEM_CTRL:
1735 ret = kvm_s390_get_mem_control(kvm, attr);
1737 case KVM_S390_VM_TOD:
1738 ret = kvm_s390_get_tod(kvm, attr);
1740 case KVM_S390_VM_CPU_MODEL:
1741 ret = kvm_s390_get_cpu_model(kvm, attr);
1743 case KVM_S390_VM_MIGRATION:
1744 ret = kvm_s390_vm_get_migration(kvm, attr);
1754 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1758 switch (attr->group) {
1759 case KVM_S390_VM_MEM_CTRL:
1760 switch (attr->attr) {
1761 case KVM_S390_VM_MEM_ENABLE_CMMA:
1762 case KVM_S390_VM_MEM_CLR_CMMA:
1763 ret = sclp.has_cmma ? 0 : -ENXIO;
1765 case KVM_S390_VM_MEM_LIMIT_SIZE:
1773 case KVM_S390_VM_TOD:
1774 switch (attr->attr) {
1775 case KVM_S390_VM_TOD_LOW:
1776 case KVM_S390_VM_TOD_HIGH:
1784 case KVM_S390_VM_CPU_MODEL:
1785 switch (attr->attr) {
1786 case KVM_S390_VM_CPU_PROCESSOR:
1787 case KVM_S390_VM_CPU_MACHINE:
1788 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1789 case KVM_S390_VM_CPU_MACHINE_FEAT:
1790 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1791 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1799 case KVM_S390_VM_CRYPTO:
1800 switch (attr->attr) {
1801 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1802 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1803 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1804 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1807 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1808 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1809 ret = ap_instructions_available() ? 0 : -ENXIO;
1816 case KVM_S390_VM_MIGRATION:
1827 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1831 int srcu_idx, i, r = 0;
1833 if (args->flags != 0)
1836 /* Is this guest using storage keys? */
1837 if (!mm_uses_skeys(current->mm))
1838 return KVM_S390_GET_SKEYS_NONE;
1840 /* Enforce sane limit on memory allocation */
1841 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1844 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1848 mmap_read_lock(current->mm);
1849 srcu_idx = srcu_read_lock(&kvm->srcu);
1850 for (i = 0; i < args->count; i++) {
1851 hva = gfn_to_hva(kvm, args->start_gfn + i);
1852 if (kvm_is_error_hva(hva)) {
1857 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1861 srcu_read_unlock(&kvm->srcu, srcu_idx);
1862 mmap_read_unlock(current->mm);
1865 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1866 sizeof(uint8_t) * args->count);
1875 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1879 int srcu_idx, i, r = 0;
1882 if (args->flags != 0)
1885 /* Enforce sane limit on memory allocation */
1886 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1889 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1893 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1894 sizeof(uint8_t) * args->count);
1900 /* Enable storage key handling for the guest */
1901 r = s390_enable_skey();
1906 mmap_read_lock(current->mm);
1907 srcu_idx = srcu_read_lock(&kvm->srcu);
1908 while (i < args->count) {
1910 hva = gfn_to_hva(kvm, args->start_gfn + i);
1911 if (kvm_is_error_hva(hva)) {
1916 /* Lowest order bit is reserved */
1917 if (keys[i] & 0x01) {
1922 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1924 r = fixup_user_fault(current->mm, hva,
1925 FAULT_FLAG_WRITE, &unlocked);
1932 srcu_read_unlock(&kvm->srcu, srcu_idx);
1933 mmap_read_unlock(current->mm);
1940 * Base address and length must be sent at the start of each block, therefore
1941 * it's cheaper to send some clean data, as long as it's less than the size of
1944 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1945 /* for consistency */
1946 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1949 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1950 * address falls in a hole. In that case the index of one of the memslots
1951 * bordering the hole is returned.
1953 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1955 int start = 0, end = slots->used_slots;
1956 int slot = atomic_read(&slots->lru_slot);
1957 struct kvm_memory_slot *memslots = slots->memslots;
1959 if (gfn >= memslots[slot].base_gfn &&
1960 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1963 while (start < end) {
1964 slot = start + (end - start) / 2;
1966 if (gfn >= memslots[slot].base_gfn)
1972 if (start >= slots->used_slots)
1973 return slots->used_slots - 1;
1975 if (gfn >= memslots[start].base_gfn &&
1976 gfn < memslots[start].base_gfn + memslots[start].npages) {
1977 atomic_set(&slots->lru_slot, start);
1983 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984 u8 *res, unsigned long bufsize)
1986 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1989 while (args->count < bufsize) {
1990 hva = gfn_to_hva(kvm, cur_gfn);
1992 * We return an error if the first value was invalid, but we
1993 * return successfully if at least one value was copied.
1995 if (kvm_is_error_hva(hva))
1996 return args->count ? 0 : -EFAULT;
1997 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1999 res[args->count++] = (pgstev >> 24) & 0x43;
2006 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2007 unsigned long cur_gfn)
2009 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2010 struct kvm_memory_slot *ms = slots->memslots + slotidx;
2011 unsigned long ofs = cur_gfn - ms->base_gfn;
2013 if (ms->base_gfn + ms->npages <= cur_gfn) {
2015 /* If we are above the highest slot, wrap around */
2017 slotidx = slots->used_slots - 1;
2019 ms = slots->memslots + slotidx;
2022 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2023 while ((slotidx > 0) && (ofs >= ms->npages)) {
2025 ms = slots->memslots + slotidx;
2026 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2028 return ms->base_gfn + ofs;
2031 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2032 u8 *res, unsigned long bufsize)
2034 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2035 struct kvm_memslots *slots = kvm_memslots(kvm);
2036 struct kvm_memory_slot *ms;
2038 if (unlikely(!slots->used_slots))
2041 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2042 ms = gfn_to_memslot(kvm, cur_gfn);
2044 args->start_gfn = cur_gfn;
2047 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2048 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2050 while (args->count < bufsize) {
2051 hva = gfn_to_hva(kvm, cur_gfn);
2052 if (kvm_is_error_hva(hva))
2054 /* Decrement only if we actually flipped the bit to 0 */
2055 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2056 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2057 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2059 /* Save the value */
2060 res[args->count++] = (pgstev >> 24) & 0x43;
2061 /* If the next bit is too far away, stop. */
2062 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2064 /* If we reached the previous "next", find the next one */
2065 if (cur_gfn == next_gfn)
2066 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2067 /* Reached the end of memory or of the buffer, stop */
2068 if ((next_gfn >= mem_end) ||
2069 (next_gfn - args->start_gfn >= bufsize))
2072 /* Reached the end of the current memslot, take the next one. */
2073 if (cur_gfn - ms->base_gfn >= ms->npages) {
2074 ms = gfn_to_memslot(kvm, cur_gfn);
2083 * This function searches for the next page with dirty CMMA attributes, and
2084 * saves the attributes in the buffer up to either the end of the buffer or
2085 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2086 * no trailing clean bytes are saved.
2087 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2088 * output buffer will indicate 0 as length.
2090 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2091 struct kvm_s390_cmma_log *args)
2093 unsigned long bufsize;
2094 int srcu_idx, peek, ret;
2097 if (!kvm->arch.use_cmma)
2099 /* Invalid/unsupported flags were specified */
2100 if (args->flags & ~KVM_S390_CMMA_PEEK)
2102 /* Migration mode query, and we are not doing a migration */
2103 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2104 if (!peek && !kvm->arch.migration_mode)
2106 /* CMMA is disabled or was not used, or the buffer has length zero */
2107 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2108 if (!bufsize || !kvm->mm->context.uses_cmm) {
2109 memset(args, 0, sizeof(*args));
2112 /* We are not peeking, and there are no dirty pages */
2113 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2114 memset(args, 0, sizeof(*args));
2118 values = vmalloc(bufsize);
2122 mmap_read_lock(kvm->mm);
2123 srcu_idx = srcu_read_lock(&kvm->srcu);
2125 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2127 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2128 srcu_read_unlock(&kvm->srcu, srcu_idx);
2129 mmap_read_unlock(kvm->mm);
2131 if (kvm->arch.migration_mode)
2132 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2134 args->remaining = 0;
2136 if (copy_to_user((void __user *)args->values, values, args->count))
2144 * This function sets the CMMA attributes for the given pages. If the input
2145 * buffer has zero length, no action is taken, otherwise the attributes are
2146 * set and the mm->context.uses_cmm flag is set.
2148 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2149 const struct kvm_s390_cmma_log *args)
2151 unsigned long hva, mask, pgstev, i;
2153 int srcu_idx, r = 0;
2157 if (!kvm->arch.use_cmma)
2159 /* invalid/unsupported flags */
2160 if (args->flags != 0)
2162 /* Enforce sane limit on memory allocation */
2163 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2166 if (args->count == 0)
2169 bits = vmalloc(array_size(sizeof(*bits), args->count));
2173 r = copy_from_user(bits, (void __user *)args->values, args->count);
2179 mmap_read_lock(kvm->mm);
2180 srcu_idx = srcu_read_lock(&kvm->srcu);
2181 for (i = 0; i < args->count; i++) {
2182 hva = gfn_to_hva(kvm, args->start_gfn + i);
2183 if (kvm_is_error_hva(hva)) {
2189 pgstev = pgstev << 24;
2190 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2191 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2193 srcu_read_unlock(&kvm->srcu, srcu_idx);
2194 mmap_read_unlock(kvm->mm);
2196 if (!kvm->mm->context.uses_cmm) {
2197 mmap_write_lock(kvm->mm);
2198 kvm->mm->context.uses_cmm = 1;
2199 mmap_write_unlock(kvm->mm);
2206 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2208 struct kvm_vcpu *vcpu;
2214 * We ignore failures and try to destroy as many CPUs as possible.
2215 * At the same time we must not free the assigned resources when
2216 * this fails, as the ultravisor has still access to that memory.
2217 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2219 * We want to return the first failure rc and rrc, though.
2221 kvm_for_each_vcpu(i, vcpu, kvm) {
2222 mutex_lock(&vcpu->mutex);
2223 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2228 mutex_unlock(&vcpu->mutex);
2233 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2238 struct kvm_vcpu *vcpu;
2240 kvm_for_each_vcpu(i, vcpu, kvm) {
2241 mutex_lock(&vcpu->mutex);
2242 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2243 mutex_unlock(&vcpu->mutex);
2248 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2252 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2256 void __user *argp = (void __user *)cmd->data;
2259 case KVM_PV_ENABLE: {
2261 if (kvm_s390_pv_is_protected(kvm))
2265 * FMT 4 SIE needs esca. As we never switch back to bsca from
2266 * esca, we need no cleanup in the error cases below
2268 r = sca_switch_to_extended(kvm);
2272 mmap_write_lock(current->mm);
2273 r = gmap_mark_unmergeable();
2274 mmap_write_unlock(current->mm);
2278 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2282 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2284 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2286 /* we need to block service interrupts from now on */
2287 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2290 case KVM_PV_DISABLE: {
2292 if (!kvm_s390_pv_is_protected(kvm))
2295 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2297 * If a CPU could not be destroyed, destroy VM will also fail.
2298 * There is no point in trying to destroy it. Instead return
2299 * the rc and rrc from the first CPU that failed destroying.
2303 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2305 /* no need to block service interrupts any more */
2306 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2309 case KVM_PV_SET_SEC_PARMS: {
2310 struct kvm_s390_pv_sec_parm parms = {};
2314 if (!kvm_s390_pv_is_protected(kvm))
2318 if (copy_from_user(&parms, argp, sizeof(parms)))
2321 /* Currently restricted to 8KB */
2323 if (parms.length > PAGE_SIZE * 2)
2327 hdr = vmalloc(parms.length);
2332 if (!copy_from_user(hdr, (void __user *)parms.origin,
2334 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2335 &cmd->rc, &cmd->rrc);
2340 case KVM_PV_UNPACK: {
2341 struct kvm_s390_pv_unp unp = {};
2344 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2348 if (copy_from_user(&unp, argp, sizeof(unp)))
2351 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2352 &cmd->rc, &cmd->rrc);
2355 case KVM_PV_VERIFY: {
2357 if (!kvm_s390_pv_is_protected(kvm))
2360 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2361 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2362 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2366 case KVM_PV_PREP_RESET: {
2368 if (!kvm_s390_pv_is_protected(kvm))
2371 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2373 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2377 case KVM_PV_UNSHARE_ALL: {
2379 if (!kvm_s390_pv_is_protected(kvm))
2382 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2384 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2394 long kvm_arch_vm_ioctl(struct file *filp,
2395 unsigned int ioctl, unsigned long arg)
2397 struct kvm *kvm = filp->private_data;
2398 void __user *argp = (void __user *)arg;
2399 struct kvm_device_attr attr;
2403 case KVM_S390_INTERRUPT: {
2404 struct kvm_s390_interrupt s390int;
2407 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2409 r = kvm_s390_inject_vm(kvm, &s390int);
2412 case KVM_CREATE_IRQCHIP: {
2413 struct kvm_irq_routing_entry routing;
2416 if (kvm->arch.use_irqchip) {
2417 /* Set up dummy routing. */
2418 memset(&routing, 0, sizeof(routing));
2419 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2423 case KVM_SET_DEVICE_ATTR: {
2425 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2427 r = kvm_s390_vm_set_attr(kvm, &attr);
2430 case KVM_GET_DEVICE_ATTR: {
2432 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2434 r = kvm_s390_vm_get_attr(kvm, &attr);
2437 case KVM_HAS_DEVICE_ATTR: {
2439 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2441 r = kvm_s390_vm_has_attr(kvm, &attr);
2444 case KVM_S390_GET_SKEYS: {
2445 struct kvm_s390_skeys args;
2448 if (copy_from_user(&args, argp,
2449 sizeof(struct kvm_s390_skeys)))
2451 r = kvm_s390_get_skeys(kvm, &args);
2454 case KVM_S390_SET_SKEYS: {
2455 struct kvm_s390_skeys args;
2458 if (copy_from_user(&args, argp,
2459 sizeof(struct kvm_s390_skeys)))
2461 r = kvm_s390_set_skeys(kvm, &args);
2464 case KVM_S390_GET_CMMA_BITS: {
2465 struct kvm_s390_cmma_log args;
2468 if (copy_from_user(&args, argp, sizeof(args)))
2470 mutex_lock(&kvm->slots_lock);
2471 r = kvm_s390_get_cmma_bits(kvm, &args);
2472 mutex_unlock(&kvm->slots_lock);
2474 r = copy_to_user(argp, &args, sizeof(args));
2480 case KVM_S390_SET_CMMA_BITS: {
2481 struct kvm_s390_cmma_log args;
2484 if (copy_from_user(&args, argp, sizeof(args)))
2486 mutex_lock(&kvm->slots_lock);
2487 r = kvm_s390_set_cmma_bits(kvm, &args);
2488 mutex_unlock(&kvm->slots_lock);
2491 case KVM_S390_PV_COMMAND: {
2492 struct kvm_pv_cmd args;
2494 /* protvirt means user sigp */
2495 kvm->arch.user_cpu_state_ctrl = 1;
2497 if (!is_prot_virt_host()) {
2501 if (copy_from_user(&args, argp, sizeof(args))) {
2509 mutex_lock(&kvm->lock);
2510 r = kvm_s390_handle_pv(kvm, &args);
2511 mutex_unlock(&kvm->lock);
2512 if (copy_to_user(argp, &args, sizeof(args))) {
2525 static int kvm_s390_apxa_installed(void)
2527 struct ap_config_info info;
2529 if (ap_instructions_available()) {
2530 if (ap_qci(&info) == 0)
2538 * The format of the crypto control block (CRYCB) is specified in the 3 low
2539 * order bits of the CRYCB designation (CRYCBD) field as follows:
2540 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2541 * AP extended addressing (APXA) facility are installed.
2542 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2543 * Format 2: Both the APXA and MSAX3 facilities are installed
2545 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2547 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2549 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2550 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2552 /* Check whether MSAX3 is installed */
2553 if (!test_kvm_facility(kvm, 76))
2556 if (kvm_s390_apxa_installed())
2557 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2559 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2562 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2563 unsigned long *aqm, unsigned long *adm)
2565 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2567 mutex_lock(&kvm->lock);
2568 kvm_s390_vcpu_block_all(kvm);
2570 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2571 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2572 memcpy(crycb->apcb1.apm, apm, 32);
2573 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2574 apm[0], apm[1], apm[2], apm[3]);
2575 memcpy(crycb->apcb1.aqm, aqm, 32);
2576 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2577 aqm[0], aqm[1], aqm[2], aqm[3]);
2578 memcpy(crycb->apcb1.adm, adm, 32);
2579 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2580 adm[0], adm[1], adm[2], adm[3]);
2583 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2584 memcpy(crycb->apcb0.apm, apm, 8);
2585 memcpy(crycb->apcb0.aqm, aqm, 2);
2586 memcpy(crycb->apcb0.adm, adm, 2);
2587 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2588 apm[0], *((unsigned short *)aqm),
2589 *((unsigned short *)adm));
2591 default: /* Can not happen */
2595 /* recreate the shadow crycb for each vcpu */
2596 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2597 kvm_s390_vcpu_unblock_all(kvm);
2598 mutex_unlock(&kvm->lock);
2600 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2602 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2604 mutex_lock(&kvm->lock);
2605 kvm_s390_vcpu_block_all(kvm);
2607 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2608 sizeof(kvm->arch.crypto.crycb->apcb0));
2609 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2610 sizeof(kvm->arch.crypto.crycb->apcb1));
2612 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2613 /* recreate the shadow crycb for each vcpu */
2614 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2615 kvm_s390_vcpu_unblock_all(kvm);
2616 mutex_unlock(&kvm->lock);
2618 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2620 static u64 kvm_s390_get_initial_cpuid(void)
2625 cpuid.version = 0xff;
2626 return *((u64 *) &cpuid);
2629 static void kvm_s390_crypto_init(struct kvm *kvm)
2631 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2632 kvm_s390_set_crycb_format(kvm);
2634 if (!test_kvm_facility(kvm, 76))
2637 /* Enable AES/DEA protected key functions by default */
2638 kvm->arch.crypto.aes_kw = 1;
2639 kvm->arch.crypto.dea_kw = 1;
2640 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2641 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2642 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2643 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2646 static void sca_dispose(struct kvm *kvm)
2648 if (kvm->arch.use_esca)
2649 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2651 free_page((unsigned long)(kvm->arch.sca));
2652 kvm->arch.sca = NULL;
2655 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2657 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2659 char debug_name[16];
2660 static unsigned long sca_offset;
2663 #ifdef CONFIG_KVM_S390_UCONTROL
2664 if (type & ~KVM_VM_S390_UCONTROL)
2666 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2673 rc = s390_enable_sie();
2679 if (!sclp.has_64bscao)
2680 alloc_flags |= GFP_DMA;
2681 rwlock_init(&kvm->arch.sca_lock);
2682 /* start with basic SCA */
2683 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2686 mutex_lock(&kvm_lock);
2688 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2690 kvm->arch.sca = (struct bsca_block *)
2691 ((char *) kvm->arch.sca + sca_offset);
2692 mutex_unlock(&kvm_lock);
2694 sprintf(debug_name, "kvm-%u", current->pid);
2696 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2700 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2701 kvm->arch.sie_page2 =
2702 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2703 if (!kvm->arch.sie_page2)
2706 kvm->arch.sie_page2->kvm = kvm;
2707 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2709 for (i = 0; i < kvm_s390_fac_size(); i++) {
2710 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2711 (kvm_s390_fac_base[i] |
2712 kvm_s390_fac_ext[i]);
2713 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2714 kvm_s390_fac_base[i];
2716 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2718 /* we are always in czam mode - even on pre z14 machines */
2719 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2720 set_kvm_facility(kvm->arch.model.fac_list, 138);
2721 /* we emulate STHYI in kvm */
2722 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2723 set_kvm_facility(kvm->arch.model.fac_list, 74);
2724 if (MACHINE_HAS_TLB_GUEST) {
2725 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2726 set_kvm_facility(kvm->arch.model.fac_list, 147);
2729 if (css_general_characteristics.aiv && test_facility(65))
2730 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2732 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2733 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2735 kvm_s390_crypto_init(kvm);
2737 mutex_init(&kvm->arch.float_int.ais_lock);
2738 spin_lock_init(&kvm->arch.float_int.lock);
2739 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2740 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2741 init_waitqueue_head(&kvm->arch.ipte_wq);
2742 mutex_init(&kvm->arch.ipte_mutex);
2744 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2745 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2747 if (type & KVM_VM_S390_UCONTROL) {
2748 kvm->arch.gmap = NULL;
2749 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2751 if (sclp.hamax == U64_MAX)
2752 kvm->arch.mem_limit = TASK_SIZE_MAX;
2754 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2756 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2757 if (!kvm->arch.gmap)
2759 kvm->arch.gmap->private = kvm;
2760 kvm->arch.gmap->pfault_enabled = 0;
2763 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2764 kvm->arch.use_skf = sclp.has_skey;
2765 spin_lock_init(&kvm->arch.start_stop_lock);
2766 kvm_s390_vsie_init(kvm);
2768 kvm_s390_gisa_init(kvm);
2769 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2773 free_page((unsigned long)kvm->arch.sie_page2);
2774 debug_unregister(kvm->arch.dbf);
2776 KVM_EVENT(3, "creation of vm failed: %d", rc);
2780 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2784 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2785 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2786 kvm_s390_clear_local_irqs(vcpu);
2787 kvm_clear_async_pf_completion_queue(vcpu);
2788 if (!kvm_is_ucontrol(vcpu->kvm))
2791 if (kvm_is_ucontrol(vcpu->kvm))
2792 gmap_remove(vcpu->arch.gmap);
2794 if (vcpu->kvm->arch.use_cmma)
2795 kvm_s390_vcpu_unsetup_cmma(vcpu);
2796 /* We can not hold the vcpu mutex here, we are already dying */
2797 if (kvm_s390_pv_cpu_get_handle(vcpu))
2798 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2799 free_page((unsigned long)(vcpu->arch.sie_block));
2802 static void kvm_free_vcpus(struct kvm *kvm)
2805 struct kvm_vcpu *vcpu;
2807 kvm_for_each_vcpu(i, vcpu, kvm)
2808 kvm_vcpu_destroy(vcpu);
2810 mutex_lock(&kvm->lock);
2811 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2812 kvm->vcpus[i] = NULL;
2814 atomic_set(&kvm->online_vcpus, 0);
2815 mutex_unlock(&kvm->lock);
2818 void kvm_arch_destroy_vm(struct kvm *kvm)
2822 kvm_free_vcpus(kvm);
2824 kvm_s390_gisa_destroy(kvm);
2826 * We are already at the end of life and kvm->lock is not taken.
2827 * This is ok as the file descriptor is closed by now and nobody
2828 * can mess with the pv state. To avoid lockdep_assert_held from
2829 * complaining we do not use kvm_s390_pv_is_protected.
2831 if (kvm_s390_pv_get_handle(kvm))
2832 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2833 debug_unregister(kvm->arch.dbf);
2834 free_page((unsigned long)kvm->arch.sie_page2);
2835 if (!kvm_is_ucontrol(kvm))
2836 gmap_remove(kvm->arch.gmap);
2837 kvm_s390_destroy_adapters(kvm);
2838 kvm_s390_clear_float_irqs(kvm);
2839 kvm_s390_vsie_destroy(kvm);
2840 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2843 /* Section: vcpu related */
2844 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2846 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2847 if (!vcpu->arch.gmap)
2849 vcpu->arch.gmap->private = vcpu->kvm;
2854 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2856 if (!kvm_s390_use_sca_entries())
2858 read_lock(&vcpu->kvm->arch.sca_lock);
2859 if (vcpu->kvm->arch.use_esca) {
2860 struct esca_block *sca = vcpu->kvm->arch.sca;
2862 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863 sca->cpu[vcpu->vcpu_id].sda = 0;
2865 struct bsca_block *sca = vcpu->kvm->arch.sca;
2867 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2868 sca->cpu[vcpu->vcpu_id].sda = 0;
2870 read_unlock(&vcpu->kvm->arch.sca_lock);
2873 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2875 if (!kvm_s390_use_sca_entries()) {
2876 struct bsca_block *sca = vcpu->kvm->arch.sca;
2878 /* we still need the basic sca for the ipte control */
2879 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2880 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2883 read_lock(&vcpu->kvm->arch.sca_lock);
2884 if (vcpu->kvm->arch.use_esca) {
2885 struct esca_block *sca = vcpu->kvm->arch.sca;
2887 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2888 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2889 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2890 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2891 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2893 struct bsca_block *sca = vcpu->kvm->arch.sca;
2895 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2896 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2897 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2898 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2900 read_unlock(&vcpu->kvm->arch.sca_lock);
2903 /* Basic SCA to Extended SCA data copy routines */
2904 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2907 d->sigp_ctrl.c = s->sigp_ctrl.c;
2908 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2911 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2915 d->ipte_control = s->ipte_control;
2917 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2918 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2921 static int sca_switch_to_extended(struct kvm *kvm)
2923 struct bsca_block *old_sca = kvm->arch.sca;
2924 struct esca_block *new_sca;
2925 struct kvm_vcpu *vcpu;
2926 unsigned int vcpu_idx;
2929 if (kvm->arch.use_esca)
2932 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2936 scaoh = (u32)((u64)(new_sca) >> 32);
2937 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2939 kvm_s390_vcpu_block_all(kvm);
2940 write_lock(&kvm->arch.sca_lock);
2942 sca_copy_b_to_e(new_sca, old_sca);
2944 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2945 vcpu->arch.sie_block->scaoh = scaoh;
2946 vcpu->arch.sie_block->scaol = scaol;
2947 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2949 kvm->arch.sca = new_sca;
2950 kvm->arch.use_esca = 1;
2952 write_unlock(&kvm->arch.sca_lock);
2953 kvm_s390_vcpu_unblock_all(kvm);
2955 free_page((unsigned long)old_sca);
2957 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2958 old_sca, kvm->arch.sca);
2962 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2966 if (!kvm_s390_use_sca_entries()) {
2967 if (id < KVM_MAX_VCPUS)
2971 if (id < KVM_S390_BSCA_CPU_SLOTS)
2973 if (!sclp.has_esca || !sclp.has_64bscao)
2976 mutex_lock(&kvm->lock);
2977 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2978 mutex_unlock(&kvm->lock);
2980 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2983 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2984 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2986 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2987 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2988 vcpu->arch.cputm_start = get_tod_clock_fast();
2989 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2992 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2993 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2995 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2996 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2997 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2998 vcpu->arch.cputm_start = 0;
2999 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3002 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3003 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3005 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3006 vcpu->arch.cputm_enabled = true;
3007 __start_cpu_timer_accounting(vcpu);
3010 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3011 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3013 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3014 __stop_cpu_timer_accounting(vcpu);
3015 vcpu->arch.cputm_enabled = false;
3018 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3020 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3021 __enable_cpu_timer_accounting(vcpu);
3025 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3027 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3028 __disable_cpu_timer_accounting(vcpu);
3032 /* set the cpu timer - may only be called from the VCPU thread itself */
3033 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3035 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3036 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3037 if (vcpu->arch.cputm_enabled)
3038 vcpu->arch.cputm_start = get_tod_clock_fast();
3039 vcpu->arch.sie_block->cputm = cputm;
3040 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3044 /* update and get the cpu timer - can also be called from other VCPU threads */
3045 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3050 if (unlikely(!vcpu->arch.cputm_enabled))
3051 return vcpu->arch.sie_block->cputm;
3053 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3055 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3057 * If the writer would ever execute a read in the critical
3058 * section, e.g. in irq context, we have a deadlock.
3060 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3061 value = vcpu->arch.sie_block->cputm;
3062 /* if cputm_start is 0, accounting is being started/stopped */
3063 if (likely(vcpu->arch.cputm_start))
3064 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3065 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3070 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3073 gmap_enable(vcpu->arch.enabled_gmap);
3074 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3075 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3076 __start_cpu_timer_accounting(vcpu);
3080 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3083 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3084 __stop_cpu_timer_accounting(vcpu);
3085 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3086 vcpu->arch.enabled_gmap = gmap_get_enabled();
3087 gmap_disable(vcpu->arch.enabled_gmap);
3091 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3093 mutex_lock(&vcpu->kvm->lock);
3095 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3096 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3098 mutex_unlock(&vcpu->kvm->lock);
3099 if (!kvm_is_ucontrol(vcpu->kvm)) {
3100 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3103 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3104 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3105 /* make vcpu_load load the right gmap on the first trigger */
3106 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3109 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3111 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3112 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3117 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3119 /* At least one ECC subfunction must be present */
3120 return kvm_has_pckmo_subfunc(kvm, 32) ||
3121 kvm_has_pckmo_subfunc(kvm, 33) ||
3122 kvm_has_pckmo_subfunc(kvm, 34) ||
3123 kvm_has_pckmo_subfunc(kvm, 40) ||
3124 kvm_has_pckmo_subfunc(kvm, 41);
3128 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3131 * If the AP instructions are not being interpreted and the MSAX3
3132 * facility is not configured for the guest, there is nothing to set up.
3134 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3137 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3138 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3139 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3140 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3142 if (vcpu->kvm->arch.crypto.apie)
3143 vcpu->arch.sie_block->eca |= ECA_APIE;
3145 /* Set up protected key support */
3146 if (vcpu->kvm->arch.crypto.aes_kw) {
3147 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3148 /* ecc is also wrapped with AES key */
3149 if (kvm_has_pckmo_ecc(vcpu->kvm))
3150 vcpu->arch.sie_block->ecd |= ECD_ECC;
3153 if (vcpu->kvm->arch.crypto.dea_kw)
3154 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3157 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3159 free_page(vcpu->arch.sie_block->cbrlo);
3160 vcpu->arch.sie_block->cbrlo = 0;
3163 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3165 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3166 if (!vcpu->arch.sie_block->cbrlo)
3171 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3173 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3175 vcpu->arch.sie_block->ibc = model->ibc;
3176 if (test_kvm_facility(vcpu->kvm, 7))
3177 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3180 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3185 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3189 if (test_kvm_facility(vcpu->kvm, 78))
3190 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3191 else if (test_kvm_facility(vcpu->kvm, 8))
3192 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3194 kvm_s390_vcpu_setup_model(vcpu);
3196 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3197 if (MACHINE_HAS_ESOP)
3198 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3199 if (test_kvm_facility(vcpu->kvm, 9))
3200 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3201 if (test_kvm_facility(vcpu->kvm, 73))
3202 vcpu->arch.sie_block->ecb |= ECB_TE;
3204 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3205 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3206 if (test_kvm_facility(vcpu->kvm, 130))
3207 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3208 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3210 vcpu->arch.sie_block->eca |= ECA_CEI;
3212 vcpu->arch.sie_block->eca |= ECA_IB;
3214 vcpu->arch.sie_block->eca |= ECA_SII;
3215 if (sclp.has_sigpif)
3216 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3217 if (test_kvm_facility(vcpu->kvm, 129)) {
3218 vcpu->arch.sie_block->eca |= ECA_VX;
3219 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3221 if (test_kvm_facility(vcpu->kvm, 139))
3222 vcpu->arch.sie_block->ecd |= ECD_MEF;
3223 if (test_kvm_facility(vcpu->kvm, 156))
3224 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3225 if (vcpu->arch.sie_block->gd) {
3226 vcpu->arch.sie_block->eca |= ECA_AIV;
3227 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3228 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3230 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3232 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3235 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3237 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3239 if (vcpu->kvm->arch.use_cmma) {
3240 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3244 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3245 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3247 vcpu->arch.sie_block->hpid = HPID_KVM;
3249 kvm_s390_vcpu_crypto_setup(vcpu);
3251 mutex_lock(&vcpu->kvm->lock);
3252 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3253 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3255 kvm_s390_vcpu_unsetup_cmma(vcpu);
3257 mutex_unlock(&vcpu->kvm->lock);
3262 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3264 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3269 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3271 struct sie_page *sie_page;
3274 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3275 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3279 vcpu->arch.sie_block = &sie_page->sie_block;
3280 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3282 /* the real guest size will always be smaller than msl */
3283 vcpu->arch.sie_block->mso = 0;
3284 vcpu->arch.sie_block->msl = sclp.hamax;
3286 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3287 spin_lock_init(&vcpu->arch.local_int.lock);
3288 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3289 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3290 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3291 seqcount_init(&vcpu->arch.cputm_seqcount);
3293 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3294 kvm_clear_async_pf_completion_queue(vcpu);
3295 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3302 kvm_s390_set_prefix(vcpu, 0);
3303 if (test_kvm_facility(vcpu->kvm, 64))
3304 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3305 if (test_kvm_facility(vcpu->kvm, 82))
3306 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3307 if (test_kvm_facility(vcpu->kvm, 133))
3308 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3309 if (test_kvm_facility(vcpu->kvm, 156))
3310 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3311 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3312 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3315 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3317 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3319 if (kvm_is_ucontrol(vcpu->kvm)) {
3320 rc = __kvm_ucontrol_vcpu_init(vcpu);
3322 goto out_free_sie_block;
3325 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3326 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3327 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3329 rc = kvm_s390_vcpu_setup(vcpu);
3331 goto out_ucontrol_uninit;
3334 out_ucontrol_uninit:
3335 if (kvm_is_ucontrol(vcpu->kvm))
3336 gmap_remove(vcpu->arch.gmap);
3338 free_page((unsigned long)(vcpu->arch.sie_block));
3342 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3344 return kvm_s390_vcpu_has_irq(vcpu, 0);
3347 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3349 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3352 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3354 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3358 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3360 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3363 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3365 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3369 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3371 return atomic_read(&vcpu->arch.sie_block->prog20) &
3372 (PROG_BLOCK_SIE | PROG_REQUEST);
3375 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3377 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3381 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3382 * If the CPU is not running (e.g. waiting as idle) the function will
3383 * return immediately. */
3384 void exit_sie(struct kvm_vcpu *vcpu)
3386 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3387 kvm_s390_vsie_kick(vcpu);
3388 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3392 /* Kick a guest cpu out of SIE to process a request synchronously */
3393 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3395 kvm_make_request(req, vcpu);
3396 kvm_s390_vcpu_request(vcpu);
3399 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3402 struct kvm *kvm = gmap->private;
3403 struct kvm_vcpu *vcpu;
3404 unsigned long prefix;
3407 if (gmap_is_shadow(gmap))
3409 if (start >= 1UL << 31)
3410 /* We are only interested in prefix pages */
3412 kvm_for_each_vcpu(i, vcpu, kvm) {
3413 /* match against both prefix pages */
3414 prefix = kvm_s390_get_prefix(vcpu);
3415 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3416 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3418 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3423 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3425 /* do not poll with more than halt_poll_max_steal percent of steal time */
3426 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3427 halt_poll_max_steal) {
3428 vcpu->stat.halt_no_poll_steal++;
3434 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3436 /* kvm common code refers to this, but never calls it */
3441 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3442 struct kvm_one_reg *reg)
3447 case KVM_REG_S390_TODPR:
3448 r = put_user(vcpu->arch.sie_block->todpr,
3449 (u32 __user *)reg->addr);
3451 case KVM_REG_S390_EPOCHDIFF:
3452 r = put_user(vcpu->arch.sie_block->epoch,
3453 (u64 __user *)reg->addr);
3455 case KVM_REG_S390_CPU_TIMER:
3456 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3457 (u64 __user *)reg->addr);
3459 case KVM_REG_S390_CLOCK_COMP:
3460 r = put_user(vcpu->arch.sie_block->ckc,
3461 (u64 __user *)reg->addr);
3463 case KVM_REG_S390_PFTOKEN:
3464 r = put_user(vcpu->arch.pfault_token,
3465 (u64 __user *)reg->addr);
3467 case KVM_REG_S390_PFCOMPARE:
3468 r = put_user(vcpu->arch.pfault_compare,
3469 (u64 __user *)reg->addr);
3471 case KVM_REG_S390_PFSELECT:
3472 r = put_user(vcpu->arch.pfault_select,
3473 (u64 __user *)reg->addr);
3475 case KVM_REG_S390_PP:
3476 r = put_user(vcpu->arch.sie_block->pp,
3477 (u64 __user *)reg->addr);
3479 case KVM_REG_S390_GBEA:
3480 r = put_user(vcpu->arch.sie_block->gbea,
3481 (u64 __user *)reg->addr);
3490 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3491 struct kvm_one_reg *reg)
3497 case KVM_REG_S390_TODPR:
3498 r = get_user(vcpu->arch.sie_block->todpr,
3499 (u32 __user *)reg->addr);
3501 case KVM_REG_S390_EPOCHDIFF:
3502 r = get_user(vcpu->arch.sie_block->epoch,
3503 (u64 __user *)reg->addr);
3505 case KVM_REG_S390_CPU_TIMER:
3506 r = get_user(val, (u64 __user *)reg->addr);
3508 kvm_s390_set_cpu_timer(vcpu, val);
3510 case KVM_REG_S390_CLOCK_COMP:
3511 r = get_user(vcpu->arch.sie_block->ckc,
3512 (u64 __user *)reg->addr);
3514 case KVM_REG_S390_PFTOKEN:
3515 r = get_user(vcpu->arch.pfault_token,
3516 (u64 __user *)reg->addr);
3517 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3518 kvm_clear_async_pf_completion_queue(vcpu);
3520 case KVM_REG_S390_PFCOMPARE:
3521 r = get_user(vcpu->arch.pfault_compare,
3522 (u64 __user *)reg->addr);
3524 case KVM_REG_S390_PFSELECT:
3525 r = get_user(vcpu->arch.pfault_select,
3526 (u64 __user *)reg->addr);
3528 case KVM_REG_S390_PP:
3529 r = get_user(vcpu->arch.sie_block->pp,
3530 (u64 __user *)reg->addr);
3532 case KVM_REG_S390_GBEA:
3533 r = get_user(vcpu->arch.sie_block->gbea,
3534 (u64 __user *)reg->addr);
3543 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3545 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3546 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3547 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3549 kvm_clear_async_pf_completion_queue(vcpu);
3550 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3551 kvm_s390_vcpu_stop(vcpu);
3552 kvm_s390_clear_local_irqs(vcpu);
3555 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3557 /* Initial reset is a superset of the normal reset */
3558 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3561 * This equals initial cpu reset in pop, but we don't switch to ESA.
3562 * We do not only reset the internal data, but also ...
3564 vcpu->arch.sie_block->gpsw.mask = 0;
3565 vcpu->arch.sie_block->gpsw.addr = 0;
3566 kvm_s390_set_prefix(vcpu, 0);
3567 kvm_s390_set_cpu_timer(vcpu, 0);
3568 vcpu->arch.sie_block->ckc = 0;
3569 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3570 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3571 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3573 /* ... the data in sync regs */
3574 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3575 vcpu->run->s.regs.ckc = 0;
3576 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3577 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3578 vcpu->run->psw_addr = 0;
3579 vcpu->run->psw_mask = 0;
3580 vcpu->run->s.regs.todpr = 0;
3581 vcpu->run->s.regs.cputm = 0;
3582 vcpu->run->s.regs.ckc = 0;
3583 vcpu->run->s.regs.pp = 0;
3584 vcpu->run->s.regs.gbea = 1;
3585 vcpu->run->s.regs.fpc = 0;
3587 * Do not reset these registers in the protected case, as some of
3588 * them are overlayed and they are not accessible in this case
3591 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3592 vcpu->arch.sie_block->gbea = 1;
3593 vcpu->arch.sie_block->pp = 0;
3594 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3595 vcpu->arch.sie_block->todpr = 0;
3599 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3601 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3603 /* Clear reset is a superset of the initial reset */
3604 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3606 memset(®s->gprs, 0, sizeof(regs->gprs));
3607 memset(®s->vrs, 0, sizeof(regs->vrs));
3608 memset(®s->acrs, 0, sizeof(regs->acrs));
3609 memset(®s->gscb, 0, sizeof(regs->gscb));
3612 regs->etoken_extension = 0;
3615 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3618 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3623 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3626 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3631 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3632 struct kvm_sregs *sregs)
3636 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3637 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3643 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3644 struct kvm_sregs *sregs)
3648 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3649 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3655 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3661 if (test_fp_ctl(fpu->fpc)) {
3665 vcpu->run->s.regs.fpc = fpu->fpc;
3667 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3668 (freg_t *) fpu->fprs);
3670 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3677 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3681 /* make sure we have the latest values */
3684 convert_vx_to_fp((freg_t *) fpu->fprs,
3685 (__vector128 *) vcpu->run->s.regs.vrs);
3687 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3688 fpu->fpc = vcpu->run->s.regs.fpc;
3694 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3698 if (!is_vcpu_stopped(vcpu))
3701 vcpu->run->psw_mask = psw.mask;
3702 vcpu->run->psw_addr = psw.addr;
3707 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3708 struct kvm_translation *tr)
3710 return -EINVAL; /* not implemented yet */
3713 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3714 KVM_GUESTDBG_USE_HW_BP | \
3715 KVM_GUESTDBG_ENABLE)
3717 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3718 struct kvm_guest_debug *dbg)
3724 vcpu->guest_debug = 0;
3725 kvm_s390_clear_bp_data(vcpu);
3727 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3731 if (!sclp.has_gpere) {
3736 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3737 vcpu->guest_debug = dbg->control;
3738 /* enforce guest PER */
3739 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3741 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3742 rc = kvm_s390_import_bp_data(vcpu, dbg);
3744 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3745 vcpu->arch.guestdbg.last_bp = 0;
3749 vcpu->guest_debug = 0;
3750 kvm_s390_clear_bp_data(vcpu);
3751 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3759 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3760 struct kvm_mp_state *mp_state)
3766 /* CHECK_STOP and LOAD are not supported yet */
3767 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3768 KVM_MP_STATE_OPERATING;
3774 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3775 struct kvm_mp_state *mp_state)
3781 /* user space knows about this interface - let it control the state */
3782 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3784 switch (mp_state->mp_state) {
3785 case KVM_MP_STATE_STOPPED:
3786 rc = kvm_s390_vcpu_stop(vcpu);
3788 case KVM_MP_STATE_OPERATING:
3789 rc = kvm_s390_vcpu_start(vcpu);
3791 case KVM_MP_STATE_LOAD:
3792 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3796 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3798 case KVM_MP_STATE_CHECK_STOP:
3799 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3808 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3810 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3813 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3816 kvm_s390_vcpu_request_handled(vcpu);
3817 if (!kvm_request_pending(vcpu))
3820 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3821 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3822 * This ensures that the ipte instruction for this request has
3823 * already finished. We might race against a second unmapper that
3824 * wants to set the blocking bit. Lets just retry the request loop.
3826 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3828 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3829 kvm_s390_get_prefix(vcpu),
3830 PAGE_SIZE * 2, PROT_WRITE);
3832 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3838 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3839 vcpu->arch.sie_block->ihcpu = 0xffff;
3843 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3844 if (!ibs_enabled(vcpu)) {
3845 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3846 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3851 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3852 if (ibs_enabled(vcpu)) {
3853 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3854 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3859 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3860 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3864 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3866 * Disable CMM virtualization; we will emulate the ESSA
3867 * instruction manually, in order to provide additional
3868 * functionalities needed for live migration.
3870 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3874 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3876 * Re-enable CMM virtualization if CMMA is available and
3877 * CMM has been used.
3879 if ((vcpu->kvm->arch.use_cmma) &&
3880 (vcpu->kvm->mm->context.uses_cmm))
3881 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3885 /* nothing to do, just clear the request */
3886 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3887 /* we left the vsie handler, nothing to do, just clear the request */
3888 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3893 void kvm_s390_set_tod_clock(struct kvm *kvm,
3894 const struct kvm_s390_vm_tod_clock *gtod)
3896 struct kvm_vcpu *vcpu;
3897 union tod_clock clk;
3900 mutex_lock(&kvm->lock);
3903 store_tod_clock_ext(&clk);
3905 kvm->arch.epoch = gtod->tod - clk.tod;
3907 if (test_kvm_facility(kvm, 139)) {
3908 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3909 if (kvm->arch.epoch > gtod->tod)
3910 kvm->arch.epdx -= 1;
3913 kvm_s390_vcpu_block_all(kvm);
3914 kvm_for_each_vcpu(i, vcpu, kvm) {
3915 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3916 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3919 kvm_s390_vcpu_unblock_all(kvm);
3921 mutex_unlock(&kvm->lock);
3925 * kvm_arch_fault_in_page - fault-in guest page if necessary
3926 * @vcpu: The corresponding virtual cpu
3927 * @gpa: Guest physical address
3928 * @writable: Whether the page should be writable or not
3930 * Make sure that a guest page has been faulted-in on the host.
3932 * Return: Zero on success, negative error code otherwise.
3934 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3936 return gmap_fault(vcpu->arch.gmap, gpa,
3937 writable ? FAULT_FLAG_WRITE : 0);
3940 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3941 unsigned long token)
3943 struct kvm_s390_interrupt inti;
3944 struct kvm_s390_irq irq;
3947 irq.u.ext.ext_params2 = token;
3948 irq.type = KVM_S390_INT_PFAULT_INIT;
3949 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3951 inti.type = KVM_S390_INT_PFAULT_DONE;
3952 inti.parm64 = token;
3953 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3957 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3958 struct kvm_async_pf *work)
3960 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3961 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3966 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3967 struct kvm_async_pf *work)
3969 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3970 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3973 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3974 struct kvm_async_pf *work)
3976 /* s390 will always inject the page directly */
3979 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3982 * s390 will always inject the page directly,
3983 * but we still want check_async_completion to cleanup
3988 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3991 struct kvm_arch_async_pf arch;
3993 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3995 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3996 vcpu->arch.pfault_compare)
3998 if (psw_extint_disabled(vcpu))
4000 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4002 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4004 if (!vcpu->arch.gmap->pfault_enabled)
4007 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4008 hva += current->thread.gmap_addr & ~PAGE_MASK;
4009 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4012 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4015 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4020 * On s390 notifications for arriving pages will be delivered directly
4021 * to the guest but the house keeping for completed pfaults is
4022 * handled outside the worker.
4024 kvm_check_async_pf_completion(vcpu);
4026 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4027 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4032 if (!kvm_is_ucontrol(vcpu->kvm)) {
4033 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4038 rc = kvm_s390_handle_requests(vcpu);
4042 if (guestdbg_enabled(vcpu)) {
4043 kvm_s390_backup_guest_per_regs(vcpu);
4044 kvm_s390_patch_guest_per_regs(vcpu);
4047 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4049 vcpu->arch.sie_block->icptcode = 0;
4050 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4051 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4052 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4057 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4059 struct kvm_s390_pgm_info pgm_info = {
4060 .code = PGM_ADDRESSING,
4065 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4066 trace_kvm_s390_sie_fault(vcpu);
4069 * We want to inject an addressing exception, which is defined as a
4070 * suppressing or terminating exception. However, since we came here
4071 * by a DAT access exception, the PSW still points to the faulting
4072 * instruction since DAT exceptions are nullifying. So we've got
4073 * to look up the current opcode to get the length of the instruction
4074 * to be able to forward the PSW.
4076 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4077 ilen = insn_length(opcode);
4081 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4082 * Forward by arbitrary ilc, injection will take care of
4083 * nullification if necessary.
4085 pgm_info = vcpu->arch.pgm;
4088 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4089 kvm_s390_forward_psw(vcpu, ilen);
4090 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4093 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4095 struct mcck_volatile_info *mcck_info;
4096 struct sie_page *sie_page;
4098 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4099 vcpu->arch.sie_block->icptcode);
4100 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4102 if (guestdbg_enabled(vcpu))
4103 kvm_s390_restore_guest_per_regs(vcpu);
4105 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4106 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4108 if (exit_reason == -EINTR) {
4109 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4110 sie_page = container_of(vcpu->arch.sie_block,
4111 struct sie_page, sie_block);
4112 mcck_info = &sie_page->mcck_info;
4113 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4117 if (vcpu->arch.sie_block->icptcode > 0) {
4118 int rc = kvm_handle_sie_intercept(vcpu);
4120 if (rc != -EOPNOTSUPP)
4122 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4123 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4124 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4125 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4127 } else if (exit_reason != -EFAULT) {
4128 vcpu->stat.exit_null++;
4130 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4131 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4132 vcpu->run->s390_ucontrol.trans_exc_code =
4133 current->thread.gmap_addr;
4134 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4136 } else if (current->thread.gmap_pfault) {
4137 trace_kvm_s390_major_guest_pfault(vcpu);
4138 current->thread.gmap_pfault = 0;
4139 if (kvm_arch_setup_async_pf(vcpu))
4141 vcpu->stat.pfault_sync++;
4142 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4144 return vcpu_post_run_fault_in_sie(vcpu);
4147 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4148 static int __vcpu_run(struct kvm_vcpu *vcpu)
4150 int rc, exit_reason;
4151 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4154 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4155 * ning the guest), so that memslots (and other stuff) are protected
4157 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4160 rc = vcpu_pre_run(vcpu);
4164 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4166 * As PF_VCPU will be used in fault handler, between
4167 * guest_enter and guest_exit should be no uaccess.
4169 local_irq_disable();
4170 guest_enter_irqoff();
4171 __disable_cpu_timer_accounting(vcpu);
4173 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4174 memcpy(sie_page->pv_grregs,
4175 vcpu->run->s.regs.gprs,
4176 sizeof(sie_page->pv_grregs));
4178 if (test_cpu_flag(CIF_FPU))
4180 exit_reason = sie64a(vcpu->arch.sie_block,
4181 vcpu->run->s.regs.gprs);
4182 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4183 memcpy(vcpu->run->s.regs.gprs,
4184 sie_page->pv_grregs,
4185 sizeof(sie_page->pv_grregs));
4187 * We're not allowed to inject interrupts on intercepts
4188 * that leave the guest state in an "in-between" state
4189 * where the next SIE entry will do a continuation.
4190 * Fence interrupts in our "internal" PSW.
4192 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4193 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4194 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4197 local_irq_disable();
4198 __enable_cpu_timer_accounting(vcpu);
4199 guest_exit_irqoff();
4201 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4203 rc = vcpu_post_run(vcpu, exit_reason);
4204 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4206 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4210 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4212 struct kvm_run *kvm_run = vcpu->run;
4213 struct runtime_instr_cb *riccb;
4216 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4217 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4218 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4219 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4220 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4221 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4222 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4223 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4225 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4226 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4227 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4228 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4229 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4230 kvm_clear_async_pf_completion_queue(vcpu);
4232 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4233 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4234 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4237 * If userspace sets the riccb (e.g. after migration) to a valid state,
4238 * we should enable RI here instead of doing the lazy enablement.
4240 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4241 test_kvm_facility(vcpu->kvm, 64) &&
4243 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4244 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4245 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4248 * If userspace sets the gscb (e.g. after migration) to non-zero,
4249 * we should enable GS here instead of doing the lazy enablement.
4251 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4252 test_kvm_facility(vcpu->kvm, 133) &&
4254 !vcpu->arch.gs_enabled) {
4255 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4256 vcpu->arch.sie_block->ecb |= ECB_GS;
4257 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4258 vcpu->arch.gs_enabled = 1;
4260 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4261 test_kvm_facility(vcpu->kvm, 82)) {
4262 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4263 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4265 if (MACHINE_HAS_GS) {
4267 __ctl_set_bit(2, 4);
4268 if (current->thread.gs_cb) {
4269 vcpu->arch.host_gscb = current->thread.gs_cb;
4270 save_gs_cb(vcpu->arch.host_gscb);
4272 if (vcpu->arch.gs_enabled) {
4273 current->thread.gs_cb = (struct gs_cb *)
4274 &vcpu->run->s.regs.gscb;
4275 restore_gs_cb(current->thread.gs_cb);
4279 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4282 static void sync_regs(struct kvm_vcpu *vcpu)
4284 struct kvm_run *kvm_run = vcpu->run;
4286 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4287 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4288 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4289 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4290 /* some control register changes require a tlb flush */
4291 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4293 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4294 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4295 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4297 save_access_regs(vcpu->arch.host_acrs);
4298 restore_access_regs(vcpu->run->s.regs.acrs);
4299 /* save host (userspace) fprs/vrs */
4301 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4302 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4304 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4306 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4307 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4308 if (test_fp_ctl(current->thread.fpu.fpc))
4309 /* User space provided an invalid FPC, let's clear it */
4310 current->thread.fpu.fpc = 0;
4312 /* Sync fmt2 only data */
4313 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4314 sync_regs_fmt2(vcpu);
4317 * In several places we have to modify our internal view to
4318 * not do things that are disallowed by the ultravisor. For
4319 * example we must not inject interrupts after specific exits
4320 * (e.g. 112 prefix page not secure). We do this by turning
4321 * off the machine check, external and I/O interrupt bits
4322 * of our PSW copy. To avoid getting validity intercepts, we
4323 * do only accept the condition code from userspace.
4325 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4326 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4330 kvm_run->kvm_dirty_regs = 0;
4333 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4335 struct kvm_run *kvm_run = vcpu->run;
4337 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4338 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4339 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4340 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4341 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4342 if (MACHINE_HAS_GS) {
4344 __ctl_set_bit(2, 4);
4345 if (vcpu->arch.gs_enabled)
4346 save_gs_cb(current->thread.gs_cb);
4347 current->thread.gs_cb = vcpu->arch.host_gscb;
4348 restore_gs_cb(vcpu->arch.host_gscb);
4349 if (!vcpu->arch.host_gscb)
4350 __ctl_clear_bit(2, 4);
4351 vcpu->arch.host_gscb = NULL;
4354 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4357 static void store_regs(struct kvm_vcpu *vcpu)
4359 struct kvm_run *kvm_run = vcpu->run;
4361 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4362 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4363 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4364 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4365 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4366 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4367 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4368 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4369 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4370 save_access_regs(vcpu->run->s.regs.acrs);
4371 restore_access_regs(vcpu->arch.host_acrs);
4372 /* Save guest register state */
4374 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4375 /* Restore will be done lazily at return */
4376 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4377 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4378 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4379 store_regs_fmt2(vcpu);
4382 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4384 struct kvm_run *kvm_run = vcpu->run;
4387 if (kvm_run->immediate_exit)
4390 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4391 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4396 if (guestdbg_exit_pending(vcpu)) {
4397 kvm_s390_prepare_debug_exit(vcpu);
4402 kvm_sigset_activate(vcpu);
4405 * no need to check the return value of vcpu_start as it can only have
4406 * an error for protvirt, but protvirt means user cpu state
4408 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4409 kvm_s390_vcpu_start(vcpu);
4410 } else if (is_vcpu_stopped(vcpu)) {
4411 pr_err_ratelimited("can't run stopped vcpu %d\n",
4418 enable_cpu_timer_accounting(vcpu);
4421 rc = __vcpu_run(vcpu);
4423 if (signal_pending(current) && !rc) {
4424 kvm_run->exit_reason = KVM_EXIT_INTR;
4428 if (guestdbg_exit_pending(vcpu) && !rc) {
4429 kvm_s390_prepare_debug_exit(vcpu);
4433 if (rc == -EREMOTE) {
4434 /* userspace support is needed, kvm_run has been prepared */
4438 disable_cpu_timer_accounting(vcpu);
4441 kvm_sigset_deactivate(vcpu);
4443 vcpu->stat.exit_userspace++;
4450 * store status at address
4451 * we use have two special cases:
4452 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4453 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4455 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4457 unsigned char archmode = 1;
4458 freg_t fprs[NUM_FPRS];
4463 px = kvm_s390_get_prefix(vcpu);
4464 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4465 if (write_guest_abs(vcpu, 163, &archmode, 1))
4468 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4469 if (write_guest_real(vcpu, 163, &archmode, 1))
4473 gpa -= __LC_FPREGS_SAVE_AREA;
4475 /* manually convert vector registers if necessary */
4476 if (MACHINE_HAS_VX) {
4477 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4478 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4481 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4482 vcpu->run->s.regs.fprs, 128);
4484 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4485 vcpu->run->s.regs.gprs, 128);
4486 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4487 &vcpu->arch.sie_block->gpsw, 16);
4488 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4490 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4491 &vcpu->run->s.regs.fpc, 4);
4492 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4493 &vcpu->arch.sie_block->todpr, 4);
4494 cputm = kvm_s390_get_cpu_timer(vcpu);
4495 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4497 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4498 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4500 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4501 &vcpu->run->s.regs.acrs, 64);
4502 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4503 &vcpu->arch.sie_block->gcr, 128);
4504 return rc ? -EFAULT : 0;
4507 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4510 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4511 * switch in the run ioctl. Let's update our copies before we save
4512 * it into the save area
4515 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4516 save_access_regs(vcpu->run->s.regs.acrs);
4518 return kvm_s390_store_status_unloaded(vcpu, addr);
4521 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4523 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4524 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4527 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4530 struct kvm_vcpu *vcpu;
4532 kvm_for_each_vcpu(i, vcpu, kvm) {
4533 __disable_ibs_on_vcpu(vcpu);
4537 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4541 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4542 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4545 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4547 int i, online_vcpus, r = 0, started_vcpus = 0;
4549 if (!is_vcpu_stopped(vcpu))
4552 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4553 /* Only one cpu at a time may enter/leave the STOPPED state. */
4554 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4555 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4557 /* Let's tell the UV that we want to change into the operating state */
4558 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4559 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4561 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4566 for (i = 0; i < online_vcpus; i++) {
4567 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4571 if (started_vcpus == 0) {
4572 /* we're the only active VCPU -> speed it up */
4573 __enable_ibs_on_vcpu(vcpu);
4574 } else if (started_vcpus == 1) {
4576 * As we are starting a second VCPU, we have to disable
4577 * the IBS facility on all VCPUs to remove potentially
4578 * outstanding ENABLE requests.
4580 __disable_ibs_on_all_vcpus(vcpu->kvm);
4583 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4585 * The real PSW might have changed due to a RESTART interpreted by the
4586 * ultravisor. We block all interrupts and let the next sie exit
4589 if (kvm_s390_pv_cpu_is_protected(vcpu))
4590 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4592 * Another VCPU might have used IBS while we were offline.
4593 * Let's play safe and flush the VCPU at startup.
4595 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4596 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4600 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4602 int i, online_vcpus, r = 0, started_vcpus = 0;
4603 struct kvm_vcpu *started_vcpu = NULL;
4605 if (is_vcpu_stopped(vcpu))
4608 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4609 /* Only one cpu at a time may enter/leave the STOPPED state. */
4610 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4611 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4613 /* Let's tell the UV that we want to change into the stopped state */
4614 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4615 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4617 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4622 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4623 kvm_s390_clear_stop_irq(vcpu);
4625 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4626 __disable_ibs_on_vcpu(vcpu);
4628 for (i = 0; i < online_vcpus; i++) {
4629 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4631 started_vcpu = vcpu->kvm->vcpus[i];
4635 if (started_vcpus == 1) {
4637 * As we only have one VCPU left, we want to enable the
4638 * IBS facility for that VCPU to speed it up.
4640 __enable_ibs_on_vcpu(started_vcpu);
4643 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4647 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4648 struct kvm_enable_cap *cap)
4656 case KVM_CAP_S390_CSS_SUPPORT:
4657 if (!vcpu->kvm->arch.css_support) {
4658 vcpu->kvm->arch.css_support = 1;
4659 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4660 trace_kvm_s390_enable_css(vcpu->kvm);
4671 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4672 struct kvm_s390_mem_op *mop)
4674 void __user *uaddr = (void __user *)mop->buf;
4677 if (mop->flags || !mop->size)
4679 if (mop->size + mop->sida_offset < mop->size)
4681 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4685 case KVM_S390_MEMOP_SIDA_READ:
4686 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4687 mop->sida_offset), mop->size))
4691 case KVM_S390_MEMOP_SIDA_WRITE:
4692 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4693 mop->sida_offset), uaddr, mop->size))
4699 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4700 struct kvm_s390_mem_op *mop)
4702 void __user *uaddr = (void __user *)mop->buf;
4703 void *tmpbuf = NULL;
4705 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4706 | KVM_S390_MEMOP_F_CHECK_ONLY;
4708 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4711 if (mop->size > MEM_OP_MAX_SIZE)
4714 if (kvm_s390_pv_cpu_is_protected(vcpu))
4717 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4718 tmpbuf = vmalloc(mop->size);
4724 case KVM_S390_MEMOP_LOGICAL_READ:
4725 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4726 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4727 mop->size, GACC_FETCH);
4730 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4732 if (copy_to_user(uaddr, tmpbuf, mop->size))
4736 case KVM_S390_MEMOP_LOGICAL_WRITE:
4737 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4738 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4739 mop->size, GACC_STORE);
4742 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4746 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4750 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4751 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4757 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4758 struct kvm_s390_mem_op *mop)
4762 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4765 case KVM_S390_MEMOP_LOGICAL_READ:
4766 case KVM_S390_MEMOP_LOGICAL_WRITE:
4767 r = kvm_s390_guest_mem_op(vcpu, mop);
4769 case KVM_S390_MEMOP_SIDA_READ:
4770 case KVM_S390_MEMOP_SIDA_WRITE:
4771 /* we are locked against sida going away by the vcpu->mutex */
4772 r = kvm_s390_guest_sida_op(vcpu, mop);
4778 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4782 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4783 unsigned int ioctl, unsigned long arg)
4785 struct kvm_vcpu *vcpu = filp->private_data;
4786 void __user *argp = (void __user *)arg;
4789 case KVM_S390_IRQ: {
4790 struct kvm_s390_irq s390irq;
4792 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4794 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4796 case KVM_S390_INTERRUPT: {
4797 struct kvm_s390_interrupt s390int;
4798 struct kvm_s390_irq s390irq = {};
4800 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4802 if (s390int_to_s390irq(&s390int, &s390irq))
4804 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4807 return -ENOIOCTLCMD;
4810 long kvm_arch_vcpu_ioctl(struct file *filp,
4811 unsigned int ioctl, unsigned long arg)
4813 struct kvm_vcpu *vcpu = filp->private_data;
4814 void __user *argp = (void __user *)arg;
4822 case KVM_S390_STORE_STATUS:
4823 idx = srcu_read_lock(&vcpu->kvm->srcu);
4824 r = kvm_s390_store_status_unloaded(vcpu, arg);
4825 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4827 case KVM_S390_SET_INITIAL_PSW: {
4831 if (copy_from_user(&psw, argp, sizeof(psw)))
4833 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4836 case KVM_S390_CLEAR_RESET:
4838 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4839 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4840 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4841 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4842 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4846 case KVM_S390_INITIAL_RESET:
4848 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4849 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4850 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4851 UVC_CMD_CPU_RESET_INITIAL,
4853 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4857 case KVM_S390_NORMAL_RESET:
4859 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4860 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4861 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4862 UVC_CMD_CPU_RESET, &rc, &rrc);
4863 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4867 case KVM_SET_ONE_REG:
4868 case KVM_GET_ONE_REG: {
4869 struct kvm_one_reg reg;
4871 if (kvm_s390_pv_cpu_is_protected(vcpu))
4874 if (copy_from_user(®, argp, sizeof(reg)))
4876 if (ioctl == KVM_SET_ONE_REG)
4877 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4879 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4882 #ifdef CONFIG_KVM_S390_UCONTROL
4883 case KVM_S390_UCAS_MAP: {
4884 struct kvm_s390_ucas_mapping ucasmap;
4886 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4891 if (!kvm_is_ucontrol(vcpu->kvm)) {
4896 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4897 ucasmap.vcpu_addr, ucasmap.length);
4900 case KVM_S390_UCAS_UNMAP: {
4901 struct kvm_s390_ucas_mapping ucasmap;
4903 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4908 if (!kvm_is_ucontrol(vcpu->kvm)) {
4913 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4918 case KVM_S390_VCPU_FAULT: {
4919 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4922 case KVM_ENABLE_CAP:
4924 struct kvm_enable_cap cap;
4926 if (copy_from_user(&cap, argp, sizeof(cap)))
4928 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4931 case KVM_S390_MEM_OP: {
4932 struct kvm_s390_mem_op mem_op;
4934 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4935 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4940 case KVM_S390_SET_IRQ_STATE: {
4941 struct kvm_s390_irq_state irq_state;
4944 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4946 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4947 irq_state.len == 0 ||
4948 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4952 /* do not use irq_state.flags, it will break old QEMUs */
4953 r = kvm_s390_set_irq_state(vcpu,
4954 (void __user *) irq_state.buf,
4958 case KVM_S390_GET_IRQ_STATE: {
4959 struct kvm_s390_irq_state irq_state;
4962 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4964 if (irq_state.len == 0) {
4968 /* do not use irq_state.flags, it will break old QEMUs */
4969 r = kvm_s390_get_irq_state(vcpu,
4970 (__u8 __user *) irq_state.buf,
4982 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4984 #ifdef CONFIG_KVM_S390_UCONTROL
4985 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4986 && (kvm_is_ucontrol(vcpu->kvm))) {
4987 vmf->page = virt_to_page(vcpu->arch.sie_block);
4988 get_page(vmf->page);
4992 return VM_FAULT_SIGBUS;
4995 /* Section: memory related */
4996 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4997 struct kvm_memory_slot *memslot,
4998 const struct kvm_userspace_memory_region *mem,
4999 enum kvm_mr_change change)
5001 /* A few sanity checks. We can have memory slots which have to be
5002 located/ended at a segment boundary (1MB). The memory in userland is
5003 ok to be fragmented into various different vmas. It is okay to mmap()
5004 and munmap() stuff in this slot after doing this call at any time */
5006 if (mem->userspace_addr & 0xffffful)
5009 if (mem->memory_size & 0xffffful)
5012 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5015 /* When we are protected, we should not change the memory slots */
5016 if (kvm_s390_pv_get_handle(kvm))
5021 void kvm_arch_commit_memory_region(struct kvm *kvm,
5022 const struct kvm_userspace_memory_region *mem,
5023 struct kvm_memory_slot *old,
5024 const struct kvm_memory_slot *new,
5025 enum kvm_mr_change change)
5031 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5032 old->npages * PAGE_SIZE);
5035 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5036 old->npages * PAGE_SIZE);
5041 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5042 mem->guest_phys_addr, mem->memory_size);
5044 case KVM_MR_FLAGS_ONLY:
5047 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5050 pr_warn("failed to commit memory region\n");
5054 static inline unsigned long nonhyp_mask(int i)
5056 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5058 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5061 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5063 vcpu->valid_wakeup = false;
5066 static int __init kvm_s390_init(void)
5070 if (!sclp.has_sief2) {
5071 pr_info("SIE is not available\n");
5075 if (nested && hpage) {
5076 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5080 for (i = 0; i < 16; i++)
5081 kvm_s390_fac_base[i] |=
5082 stfle_fac_list[i] & nonhyp_mask(i);
5084 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5087 static void __exit kvm_s390_exit(void)
5092 module_init(kvm_s390_init);
5093 module_exit(kvm_s390_exit);
5096 * Enable autoloading of the kvm module.
5097 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5098 * since x86 takes a different approach.
5100 #include <linux/miscdevice.h>
5101 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5102 MODULE_ALIAS("devname:kvm");