1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 .name_size = KVM_STATS_NAME_SIZE,
72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 .id_offset = sizeof(struct kvm_stats_header),
74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 sizeof(kvm_vm_stats_desc),
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 KVM_GENERIC_VCPU_STATS(),
81 STATS_DESC_COUNTER(VCPU, exit_userspace),
82 STATS_DESC_COUNTER(VCPU, exit_null),
83 STATS_DESC_COUNTER(VCPU, exit_external_request),
84 STATS_DESC_COUNTER(VCPU, exit_io_request),
85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 STATS_DESC_COUNTER(VCPU, exit_validity),
88 STATS_DESC_COUNTER(VCPU, exit_instruction),
89 STATS_DESC_COUNTER(VCPU, exit_pei),
90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_program),
108 STATS_DESC_COUNTER(VCPU, deliver_io),
109 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 STATS_DESC_COUNTER(VCPU, inject_ckc),
112 STATS_DESC_COUNTER(VCPU, inject_cputm),
113 STATS_DESC_COUNTER(VCPU, inject_external_call),
114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 STATS_DESC_COUNTER(VCPU, inject_mchk),
116 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 STATS_DESC_COUNTER(VCPU, inject_program),
118 STATS_DESC_COUNTER(VCPU, inject_restart),
119 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 STATS_DESC_COUNTER(VCPU, instruction_gs),
123 STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 STATS_DESC_COUNTER(VCPU, instruction_sck),
129 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 STATS_DESC_COUNTER(VCPU, instruction_spx),
132 STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 STATS_DESC_COUNTER(VCPU, instruction_stap),
134 STATS_DESC_COUNTER(VCPU, instruction_iske),
135 STATS_DESC_COUNTER(VCPU, instruction_ri),
136 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 STATS_DESC_COUNTER(VCPU, instruction_sske),
138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 STATS_DESC_COUNTER(VCPU, instruction_tb),
142 STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 STATS_DESC_COUNTER(VCPU, instruction_sie),
146 STATS_DESC_COUNTER(VCPU, instruction_essa),
147 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 STATS_DESC_COUNTER(VCPU, pfault_sync)
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 .name_size = KVM_STATS_NAME_SIZE,
178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 .id_offset = sizeof(struct kvm_stats_header),
180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 sizeof(kvm_vcpu_stats_desc),
185 /* allow nested virtualization in KVM (if enabled by user space) */
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
190 /* allow 1m huge page guest backing, if !nested */
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
195 /* maximum percentage of steal time for polling. >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211 * For now we handle at most 16 double words as this is what the s390 base
212 * kernel handles and stores in the prefix page. If we ever need to go beyond
213 * this, this requires changes to code, but the external uapi can stay.
215 #define SIZE_INTERNAL 16
218 * Base feature mask that defines default mask for facilities. Consists of the
219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224 * and defines the facilities that can be enabled via a cpu model.
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
228 static unsigned long kvm_s390_fac_size(void)
230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 sizeof(stfle_fac_list));
235 return SIZE_INTERNAL;
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
251 /* every s390 is virtualization enabled ;-) */
255 int kvm_arch_check_processor_compat(void *opaque)
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
263 static int sca_switch_to_extended(struct kvm *kvm);
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
270 * The TOD jumps by delta, we have to compensate this by adding
271 * -delta to the epoch.
275 /* sign-extension - we're adding to signed values below */
280 if (scb->ecd & ECD_MEF) {
281 scb->epdx += delta_idx;
282 if (scb->epoch < delta)
288 * This callback is executed during stop_machine(). All CPUs are therefore
289 * temporarily stopped. In order not to change guest behavior, we have to
290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291 * so a CPU won't be stopped while calculating with the epoch.
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
297 struct kvm_vcpu *vcpu;
299 unsigned long long *delta = v;
301 list_for_each_entry(kvm, &vm_list, vm_list) {
302 kvm_for_each_vcpu(i, vcpu, kvm) {
303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
305 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
308 if (vcpu->arch.cputm_enabled)
309 vcpu->arch.cputm_start += *delta;
310 if (vcpu->arch.vsie_block)
311 kvm_clock_sync_scb(vcpu->arch.vsie_block,
318 static struct notifier_block kvm_clock_notifier = {
319 .notifier_call = kvm_clock_sync,
322 int kvm_arch_hardware_setup(void *opaque)
324 gmap_notifier.notifier_call = kvm_gmap_notifier;
325 gmap_register_pte_notifier(&gmap_notifier);
326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 gmap_register_pte_notifier(&vsie_gmap_notifier);
328 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 &kvm_clock_notifier);
333 void kvm_arch_hardware_unsetup(void)
335 gmap_unregister_pte_notifier(&gmap_notifier);
336 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 &kvm_clock_notifier);
341 static void allow_cpu_feat(unsigned long nr)
343 set_bit_inv(nr, kvm_s390_available_cpu_feat);
346 static inline int plo_test_bit(unsigned char nr)
348 unsigned long function = (unsigned long)nr | 0x100;
352 " lgr 0,%[function]\n"
353 /* Parameter registers are ignored for "test bit" */
358 : [function] "d" (function)
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
368 /* Parameter registers are ignored */
369 " .insn rrf,%[opc] << 16,2,4,6,0\n"
371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 : "cc", "memory", "0", "1");
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
378 static void kvm_s390_cpu_feat_init(void)
382 for (i = 0; i < 256; ++i) {
384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
387 if (test_facility(28)) /* TOD-clock steering */
388 ptff(kvm_s390_available_subfunc.ptff,
389 sizeof(kvm_s390_available_subfunc.ptff),
392 if (test_facility(17)) { /* MSA */
393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 kvm_s390_available_subfunc.kmac);
395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmc);
397 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.km);
399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kimd);
401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.klmd);
404 if (test_facility(76)) /* MSA3 */
405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.pckmo);
407 if (test_facility(77)) { /* MSA4 */
408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kmctr);
410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kmf);
412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmo);
414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.pcc);
417 if (test_facility(57)) /* MSA5 */
418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.ppno);
421 if (test_facility(146)) /* MSA8 */
422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kma);
425 if (test_facility(155)) /* MSA9 */
426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kdsa);
429 if (test_facility(150)) /* SORTL */
430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
432 if (test_facility(151)) /* DFLTCC */
433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
435 if (MACHINE_HAS_ESOP)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 !test_facility(3) || !nested)
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 if (sclp.has_64bscao)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 * all skey handling functions read/set the skey from the PGSTE
464 * instead of the real storage key.
466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 * pages being detected as preserved although they are resident.
469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 * cannot easily shadow the SCA because of the ipte lock.
481 int kvm_arch_init(void *opaque)
485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf_uv)
493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
497 kvm_s390_cpu_feat_init();
499 /* Register floating interrupt controller interface. */
500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
502 pr_err("A FLIC registration call failed with rc=%d\n", rc);
506 rc = kvm_s390_gib_init(GAL_ISC);
517 void kvm_arch_exit(void)
519 kvm_s390_gib_destroy();
520 debug_unregister(kvm_s390_dbf);
521 debug_unregister(kvm_s390_dbf_uv);
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 unsigned int ioctl, unsigned long arg)
528 if (ioctl == KVM_S390_ENABLE_SIE)
529 return s390_enable_sie();
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
538 case KVM_CAP_S390_PSW:
539 case KVM_CAP_S390_GMAP:
540 case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 case KVM_CAP_S390_UCONTROL:
544 case KVM_CAP_ASYNC_PF:
545 case KVM_CAP_SYNC_REGS:
546 case KVM_CAP_ONE_REG:
547 case KVM_CAP_ENABLE_CAP:
548 case KVM_CAP_S390_CSS_SUPPORT:
549 case KVM_CAP_IOEVENTFD:
550 case KVM_CAP_DEVICE_CTRL:
551 case KVM_CAP_S390_IRQCHIP:
552 case KVM_CAP_VM_ATTRIBUTES:
553 case KVM_CAP_MP_STATE:
554 case KVM_CAP_IMMEDIATE_EXIT:
555 case KVM_CAP_S390_INJECT_IRQ:
556 case KVM_CAP_S390_USER_SIGP:
557 case KVM_CAP_S390_USER_STSI:
558 case KVM_CAP_S390_SKEYS:
559 case KVM_CAP_S390_IRQ_STATE:
560 case KVM_CAP_S390_USER_INSTR0:
561 case KVM_CAP_S390_CMMA_MIGRATION:
562 case KVM_CAP_S390_AIS:
563 case KVM_CAP_S390_AIS_MIGRATION:
564 case KVM_CAP_S390_VCPU_RESETS:
565 case KVM_CAP_SET_GUEST_DEBUG:
566 case KVM_CAP_S390_DIAG318:
569 case KVM_CAP_SET_GUEST_DEBUG2:
570 r = KVM_GUESTDBG_VALID_MASK;
572 case KVM_CAP_S390_HPAGE_1M:
574 if (hpage && !kvm_is_ucontrol(kvm))
577 case KVM_CAP_S390_MEM_OP:
580 case KVM_CAP_NR_VCPUS:
581 case KVM_CAP_MAX_VCPUS:
582 case KVM_CAP_MAX_VCPU_ID:
583 r = KVM_S390_BSCA_CPU_SLOTS;
584 if (!kvm_s390_use_sca_entries())
586 else if (sclp.has_esca && sclp.has_64bscao)
587 r = KVM_S390_ESCA_CPU_SLOTS;
589 case KVM_CAP_S390_COW:
590 r = MACHINE_HAS_ESOP;
592 case KVM_CAP_S390_VECTOR_REGISTERS:
595 case KVM_CAP_S390_RI:
596 r = test_facility(64);
598 case KVM_CAP_S390_GS:
599 r = test_facility(133);
601 case KVM_CAP_S390_BPB:
602 r = test_facility(82);
604 case KVM_CAP_S390_PROTECTED:
605 r = is_prot_virt_host();
613 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 gfn_t cur_gfn, last_gfn;
617 unsigned long gaddr, vmaddr;
618 struct gmap *gmap = kvm->arch.gmap;
619 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
621 /* Loop over all guest segments */
622 cur_gfn = memslot->base_gfn;
623 last_gfn = memslot->base_gfn + memslot->npages;
624 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
625 gaddr = gfn_to_gpa(cur_gfn);
626 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
627 if (kvm_is_error_hva(vmaddr))
630 bitmap_zero(bitmap, _PAGE_ENTRIES);
631 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
632 for (i = 0; i < _PAGE_ENTRIES; i++) {
633 if (test_bit(i, bitmap))
634 mark_page_dirty(kvm, cur_gfn + i);
637 if (fatal_signal_pending(current))
643 /* Section: vm related */
644 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647 * Get (and clear) the dirty memory log for a memory slot.
649 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
650 struct kvm_dirty_log *log)
654 struct kvm_memory_slot *memslot;
657 if (kvm_is_ucontrol(kvm))
660 mutex_lock(&kvm->slots_lock);
663 if (log->slot >= KVM_USER_MEM_SLOTS)
666 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
670 /* Clear the dirty log */
672 n = kvm_dirty_bitmap_bytes(memslot);
673 memset(memslot->dirty_bitmap, 0, n);
677 mutex_unlock(&kvm->slots_lock);
681 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 struct kvm_vcpu *vcpu;
686 kvm_for_each_vcpu(i, vcpu, kvm) {
687 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
691 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
699 case KVM_CAP_S390_IRQCHIP:
700 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
701 kvm->arch.use_irqchip = 1;
704 case KVM_CAP_S390_USER_SIGP:
705 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
706 kvm->arch.user_sigp = 1;
709 case KVM_CAP_S390_VECTOR_REGISTERS:
710 mutex_lock(&kvm->lock);
711 if (kvm->created_vcpus) {
713 } else if (MACHINE_HAS_VX) {
714 set_kvm_facility(kvm->arch.model.fac_mask, 129);
715 set_kvm_facility(kvm->arch.model.fac_list, 129);
716 if (test_facility(134)) {
717 set_kvm_facility(kvm->arch.model.fac_mask, 134);
718 set_kvm_facility(kvm->arch.model.fac_list, 134);
720 if (test_facility(135)) {
721 set_kvm_facility(kvm->arch.model.fac_mask, 135);
722 set_kvm_facility(kvm->arch.model.fac_list, 135);
724 if (test_facility(148)) {
725 set_kvm_facility(kvm->arch.model.fac_mask, 148);
726 set_kvm_facility(kvm->arch.model.fac_list, 148);
728 if (test_facility(152)) {
729 set_kvm_facility(kvm->arch.model.fac_mask, 152);
730 set_kvm_facility(kvm->arch.model.fac_list, 152);
732 if (test_facility(192)) {
733 set_kvm_facility(kvm->arch.model.fac_mask, 192);
734 set_kvm_facility(kvm->arch.model.fac_list, 192);
739 mutex_unlock(&kvm->lock);
740 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
741 r ? "(not available)" : "(success)");
743 case KVM_CAP_S390_RI:
745 mutex_lock(&kvm->lock);
746 if (kvm->created_vcpus) {
748 } else if (test_facility(64)) {
749 set_kvm_facility(kvm->arch.model.fac_mask, 64);
750 set_kvm_facility(kvm->arch.model.fac_list, 64);
753 mutex_unlock(&kvm->lock);
754 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
755 r ? "(not available)" : "(success)");
757 case KVM_CAP_S390_AIS:
758 mutex_lock(&kvm->lock);
759 if (kvm->created_vcpus) {
762 set_kvm_facility(kvm->arch.model.fac_mask, 72);
763 set_kvm_facility(kvm->arch.model.fac_list, 72);
766 mutex_unlock(&kvm->lock);
767 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
768 r ? "(not available)" : "(success)");
770 case KVM_CAP_S390_GS:
772 mutex_lock(&kvm->lock);
773 if (kvm->created_vcpus) {
775 } else if (test_facility(133)) {
776 set_kvm_facility(kvm->arch.model.fac_mask, 133);
777 set_kvm_facility(kvm->arch.model.fac_list, 133);
780 mutex_unlock(&kvm->lock);
781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
782 r ? "(not available)" : "(success)");
784 case KVM_CAP_S390_HPAGE_1M:
785 mutex_lock(&kvm->lock);
786 if (kvm->created_vcpus)
788 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
792 mmap_write_lock(kvm->mm);
793 kvm->mm->context.allow_gmap_hpage_1m = 1;
794 mmap_write_unlock(kvm->mm);
796 * We might have to create fake 4k page
797 * tables. To avoid that the hardware works on
798 * stale PGSTEs, we emulate these instructions.
800 kvm->arch.use_skf = 0;
801 kvm->arch.use_pfmfi = 0;
803 mutex_unlock(&kvm->lock);
804 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
805 r ? "(not available)" : "(success)");
807 case KVM_CAP_S390_USER_STSI:
808 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
809 kvm->arch.user_stsi = 1;
812 case KVM_CAP_S390_USER_INSTR0:
813 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
814 kvm->arch.user_instr0 = 1;
815 icpt_operexc_on_all_vcpus(kvm);
825 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
829 switch (attr->attr) {
830 case KVM_S390_VM_MEM_LIMIT_SIZE:
832 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
833 kvm->arch.mem_limit);
834 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
844 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
848 switch (attr->attr) {
849 case KVM_S390_VM_MEM_ENABLE_CMMA:
854 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
855 mutex_lock(&kvm->lock);
856 if (kvm->created_vcpus)
858 else if (kvm->mm->context.allow_gmap_hpage_1m)
861 kvm->arch.use_cmma = 1;
862 /* Not compatible with cmma. */
863 kvm->arch.use_pfmfi = 0;
866 mutex_unlock(&kvm->lock);
868 case KVM_S390_VM_MEM_CLR_CMMA:
873 if (!kvm->arch.use_cmma)
876 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
877 mutex_lock(&kvm->lock);
878 idx = srcu_read_lock(&kvm->srcu);
879 s390_reset_cmma(kvm->arch.gmap->mm);
880 srcu_read_unlock(&kvm->srcu, idx);
881 mutex_unlock(&kvm->lock);
884 case KVM_S390_VM_MEM_LIMIT_SIZE: {
885 unsigned long new_limit;
887 if (kvm_is_ucontrol(kvm))
890 if (get_user(new_limit, (u64 __user *)attr->addr))
893 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
894 new_limit > kvm->arch.mem_limit)
900 /* gmap_create takes last usable address */
901 if (new_limit != KVM_S390_NO_MEM_LIMIT)
905 mutex_lock(&kvm->lock);
906 if (!kvm->created_vcpus) {
907 /* gmap_create will round the limit up */
908 struct gmap *new = gmap_create(current->mm, new_limit);
913 gmap_remove(kvm->arch.gmap);
915 kvm->arch.gmap = new;
919 mutex_unlock(&kvm->lock);
920 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
921 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
922 (void *) kvm->arch.gmap->asce);
932 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
934 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
936 struct kvm_vcpu *vcpu;
939 kvm_s390_vcpu_block_all(kvm);
941 kvm_for_each_vcpu(i, vcpu, kvm) {
942 kvm_s390_vcpu_crypto_setup(vcpu);
943 /* recreate the shadow crycb by leaving the VSIE handler */
944 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947 kvm_s390_vcpu_unblock_all(kvm);
950 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
952 mutex_lock(&kvm->lock);
953 switch (attr->attr) {
954 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
955 if (!test_kvm_facility(kvm, 76)) {
956 mutex_unlock(&kvm->lock);
960 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
961 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
962 kvm->arch.crypto.aes_kw = 1;
963 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
965 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
966 if (!test_kvm_facility(kvm, 76)) {
967 mutex_unlock(&kvm->lock);
971 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
972 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
973 kvm->arch.crypto.dea_kw = 1;
974 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
976 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
977 if (!test_kvm_facility(kvm, 76)) {
978 mutex_unlock(&kvm->lock);
981 kvm->arch.crypto.aes_kw = 0;
982 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
983 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
984 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
986 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
987 if (!test_kvm_facility(kvm, 76)) {
988 mutex_unlock(&kvm->lock);
991 kvm->arch.crypto.dea_kw = 0;
992 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
993 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
994 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
996 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
997 if (!ap_instructions_available()) {
998 mutex_unlock(&kvm->lock);
1001 kvm->arch.crypto.apie = 1;
1003 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1004 if (!ap_instructions_available()) {
1005 mutex_unlock(&kvm->lock);
1008 kvm->arch.crypto.apie = 0;
1011 mutex_unlock(&kvm->lock);
1015 kvm_s390_vcpu_crypto_reset_all(kvm);
1016 mutex_unlock(&kvm->lock);
1020 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 struct kvm_vcpu *vcpu;
1025 kvm_for_each_vcpu(cx, vcpu, kvm)
1026 kvm_s390_sync_request(req, vcpu);
1030 * Must be called with kvm->srcu held to avoid races on memslots, and with
1031 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1033 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1035 struct kvm_memory_slot *ms;
1036 struct kvm_memslots *slots;
1037 unsigned long ram_pages = 0;
1040 /* migration mode already enabled */
1041 if (kvm->arch.migration_mode)
1043 slots = kvm_memslots(kvm);
1044 if (!slots || !slots->used_slots)
1047 if (!kvm->arch.use_cmma) {
1048 kvm->arch.migration_mode = 1;
1051 /* mark all the pages in active slots as dirty */
1052 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1053 ms = slots->memslots + slotnr;
1054 if (!ms->dirty_bitmap)
1057 * The second half of the bitmap is only used on x86,
1058 * and would be wasted otherwise, so we put it to good
1059 * use here to keep track of the state of the storage
1062 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1063 ram_pages += ms->npages;
1065 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1066 kvm->arch.migration_mode = 1;
1067 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1072 * Must be called with kvm->slots_lock to avoid races with ourselves and
1073 * kvm_s390_vm_start_migration.
1075 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 /* migration mode already disabled */
1078 if (!kvm->arch.migration_mode)
1080 kvm->arch.migration_mode = 0;
1081 if (kvm->arch.use_cmma)
1082 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1086 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1087 struct kvm_device_attr *attr)
1091 mutex_lock(&kvm->slots_lock);
1092 switch (attr->attr) {
1093 case KVM_S390_VM_MIGRATION_START:
1094 res = kvm_s390_vm_start_migration(kvm);
1096 case KVM_S390_VM_MIGRATION_STOP:
1097 res = kvm_s390_vm_stop_migration(kvm);
1102 mutex_unlock(&kvm->slots_lock);
1107 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1108 struct kvm_device_attr *attr)
1110 u64 mig = kvm->arch.migration_mode;
1112 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1115 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1120 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 struct kvm_s390_vm_tod_clock gtod;
1124 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1127 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129 kvm_s390_set_tod_clock(kvm, >od);
1131 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1132 gtod.epoch_idx, gtod.tod);
1137 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1141 if (copy_from_user(>od_high, (void __user *)attr->addr,
1147 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1152 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 struct kvm_s390_vm_tod_clock gtod = { 0 };
1156 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1160 kvm_s390_set_tod_clock(kvm, >od);
1161 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1165 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1172 switch (attr->attr) {
1173 case KVM_S390_VM_TOD_EXT:
1174 ret = kvm_s390_set_tod_ext(kvm, attr);
1176 case KVM_S390_VM_TOD_HIGH:
1177 ret = kvm_s390_set_tod_high(kvm, attr);
1179 case KVM_S390_VM_TOD_LOW:
1180 ret = kvm_s390_set_tod_low(kvm, attr);
1189 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1190 struct kvm_s390_vm_tod_clock *gtod)
1192 union tod_clock clk;
1196 store_tod_clock_ext(&clk);
1198 gtod->tod = clk.tod + kvm->arch.epoch;
1199 gtod->epoch_idx = 0;
1200 if (test_kvm_facility(kvm, 139)) {
1201 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1202 if (gtod->tod < clk.tod)
1203 gtod->epoch_idx += 1;
1209 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 struct kvm_s390_vm_tod_clock gtod;
1213 memset(>od, 0, sizeof(gtod));
1214 kvm_s390_get_tod_clock(kvm, >od);
1215 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1218 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1219 gtod.epoch_idx, gtod.tod);
1223 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1227 if (copy_to_user((void __user *)attr->addr, >od_high,
1230 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1235 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1239 gtod = kvm_s390_get_tod_clock_fast(kvm);
1240 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1242 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1247 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1254 switch (attr->attr) {
1255 case KVM_S390_VM_TOD_EXT:
1256 ret = kvm_s390_get_tod_ext(kvm, attr);
1258 case KVM_S390_VM_TOD_HIGH:
1259 ret = kvm_s390_get_tod_high(kvm, attr);
1261 case KVM_S390_VM_TOD_LOW:
1262 ret = kvm_s390_get_tod_low(kvm, attr);
1271 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 struct kvm_s390_vm_cpu_processor *proc;
1274 u16 lowest_ibc, unblocked_ibc;
1277 mutex_lock(&kvm->lock);
1278 if (kvm->created_vcpus) {
1282 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1287 if (!copy_from_user(proc, (void __user *)attr->addr,
1289 kvm->arch.model.cpuid = proc->cpuid;
1290 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1291 unblocked_ibc = sclp.ibc & 0xfff;
1292 if (lowest_ibc && proc->ibc) {
1293 if (proc->ibc > unblocked_ibc)
1294 kvm->arch.model.ibc = unblocked_ibc;
1295 else if (proc->ibc < lowest_ibc)
1296 kvm->arch.model.ibc = lowest_ibc;
1298 kvm->arch.model.ibc = proc->ibc;
1300 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1301 S390_ARCH_FAC_LIST_SIZE_BYTE);
1302 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1303 kvm->arch.model.ibc,
1304 kvm->arch.model.cpuid);
1305 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1306 kvm->arch.model.fac_list[0],
1307 kvm->arch.model.fac_list[1],
1308 kvm->arch.model.fac_list[2]);
1313 mutex_unlock(&kvm->lock);
1317 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1318 struct kvm_device_attr *attr)
1320 struct kvm_s390_vm_cpu_feat data;
1322 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324 if (!bitmap_subset((unsigned long *) data.feat,
1325 kvm_s390_available_cpu_feat,
1326 KVM_S390_VM_CPU_FEAT_NR_BITS))
1329 mutex_lock(&kvm->lock);
1330 if (kvm->created_vcpus) {
1331 mutex_unlock(&kvm->lock);
1334 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1335 KVM_S390_VM_CPU_FEAT_NR_BITS);
1336 mutex_unlock(&kvm->lock);
1337 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1344 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1345 struct kvm_device_attr *attr)
1347 mutex_lock(&kvm->lock);
1348 if (kvm->created_vcpus) {
1349 mutex_unlock(&kvm->lock);
1353 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1354 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1355 mutex_unlock(&kvm->lock);
1358 mutex_unlock(&kvm->lock);
1360 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1361 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1365 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1366 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1368 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1369 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1371 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1372 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1374 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1375 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1377 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1378 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1380 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1381 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1383 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1384 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1386 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1387 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1389 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1390 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1392 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1393 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1395 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1396 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1398 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1399 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1401 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1402 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1404 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1405 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1407 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1408 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1412 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1413 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1421 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1425 switch (attr->attr) {
1426 case KVM_S390_VM_CPU_PROCESSOR:
1427 ret = kvm_s390_set_processor(kvm, attr);
1429 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1430 ret = kvm_s390_set_processor_feat(kvm, attr);
1432 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1433 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1439 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 struct kvm_s390_vm_cpu_processor *proc;
1444 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1449 proc->cpuid = kvm->arch.model.cpuid;
1450 proc->ibc = kvm->arch.model.ibc;
1451 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1452 S390_ARCH_FAC_LIST_SIZE_BYTE);
1453 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454 kvm->arch.model.ibc,
1455 kvm->arch.model.cpuid);
1456 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457 kvm->arch.model.fac_list[0],
1458 kvm->arch.model.fac_list[1],
1459 kvm->arch.model.fac_list[2]);
1460 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1467 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 struct kvm_s390_vm_cpu_machine *mach;
1472 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1477 get_cpu_id((struct cpuid *) &mach->cpuid);
1478 mach->ibc = sclp.ibc;
1479 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1480 S390_ARCH_FAC_LIST_SIZE_BYTE);
1481 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1482 sizeof(stfle_fac_list));
1483 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1484 kvm->arch.model.ibc,
1485 kvm->arch.model.cpuid);
1486 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1490 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1494 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1501 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1502 struct kvm_device_attr *attr)
1504 struct kvm_s390_vm_cpu_feat data;
1506 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1507 KVM_S390_VM_CPU_FEAT_NR_BITS);
1508 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1517 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1518 struct kvm_device_attr *attr)
1520 struct kvm_s390_vm_cpu_feat data;
1522 bitmap_copy((unsigned long *) data.feat,
1523 kvm_s390_available_cpu_feat,
1524 KVM_S390_VM_CPU_FEAT_NR_BITS);
1525 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1534 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1535 struct kvm_device_attr *attr)
1537 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1538 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1541 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1542 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1546 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1547 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1549 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1550 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1552 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1553 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1555 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1556 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1558 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1559 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1561 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1562 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1564 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1565 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1567 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1568 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1570 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1571 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1573 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1574 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1576 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1577 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1579 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1580 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1582 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1583 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1585 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1586 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1588 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1589 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1593 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1594 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1602 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1603 struct kvm_device_attr *attr)
1605 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1606 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1609 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1610 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1614 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1615 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1617 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1618 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1620 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1621 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1623 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1624 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1626 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1627 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1629 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1630 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1632 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1633 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1635 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1636 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1638 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1639 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1641 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1642 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1644 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1645 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1647 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1648 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1650 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1651 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1653 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1654 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1656 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1657 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1661 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1662 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1670 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1674 switch (attr->attr) {
1675 case KVM_S390_VM_CPU_PROCESSOR:
1676 ret = kvm_s390_get_processor(kvm, attr);
1678 case KVM_S390_VM_CPU_MACHINE:
1679 ret = kvm_s390_get_machine(kvm, attr);
1681 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1682 ret = kvm_s390_get_processor_feat(kvm, attr);
1684 case KVM_S390_VM_CPU_MACHINE_FEAT:
1685 ret = kvm_s390_get_machine_feat(kvm, attr);
1687 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1688 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1691 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1697 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1701 switch (attr->group) {
1702 case KVM_S390_VM_MEM_CTRL:
1703 ret = kvm_s390_set_mem_control(kvm, attr);
1705 case KVM_S390_VM_TOD:
1706 ret = kvm_s390_set_tod(kvm, attr);
1708 case KVM_S390_VM_CPU_MODEL:
1709 ret = kvm_s390_set_cpu_model(kvm, attr);
1711 case KVM_S390_VM_CRYPTO:
1712 ret = kvm_s390_vm_set_crypto(kvm, attr);
1714 case KVM_S390_VM_MIGRATION:
1715 ret = kvm_s390_vm_set_migration(kvm, attr);
1725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1729 switch (attr->group) {
1730 case KVM_S390_VM_MEM_CTRL:
1731 ret = kvm_s390_get_mem_control(kvm, attr);
1733 case KVM_S390_VM_TOD:
1734 ret = kvm_s390_get_tod(kvm, attr);
1736 case KVM_S390_VM_CPU_MODEL:
1737 ret = kvm_s390_get_cpu_model(kvm, attr);
1739 case KVM_S390_VM_MIGRATION:
1740 ret = kvm_s390_vm_get_migration(kvm, attr);
1750 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1754 switch (attr->group) {
1755 case KVM_S390_VM_MEM_CTRL:
1756 switch (attr->attr) {
1757 case KVM_S390_VM_MEM_ENABLE_CMMA:
1758 case KVM_S390_VM_MEM_CLR_CMMA:
1759 ret = sclp.has_cmma ? 0 : -ENXIO;
1761 case KVM_S390_VM_MEM_LIMIT_SIZE:
1769 case KVM_S390_VM_TOD:
1770 switch (attr->attr) {
1771 case KVM_S390_VM_TOD_LOW:
1772 case KVM_S390_VM_TOD_HIGH:
1780 case KVM_S390_VM_CPU_MODEL:
1781 switch (attr->attr) {
1782 case KVM_S390_VM_CPU_PROCESSOR:
1783 case KVM_S390_VM_CPU_MACHINE:
1784 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1785 case KVM_S390_VM_CPU_MACHINE_FEAT:
1786 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1787 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1795 case KVM_S390_VM_CRYPTO:
1796 switch (attr->attr) {
1797 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1798 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1799 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1800 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1803 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1804 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1805 ret = ap_instructions_available() ? 0 : -ENXIO;
1812 case KVM_S390_VM_MIGRATION:
1823 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1827 int srcu_idx, i, r = 0;
1829 if (args->flags != 0)
1832 /* Is this guest using storage keys? */
1833 if (!mm_uses_skeys(current->mm))
1834 return KVM_S390_GET_SKEYS_NONE;
1836 /* Enforce sane limit on memory allocation */
1837 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1840 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1844 mmap_read_lock(current->mm);
1845 srcu_idx = srcu_read_lock(&kvm->srcu);
1846 for (i = 0; i < args->count; i++) {
1847 hva = gfn_to_hva(kvm, args->start_gfn + i);
1848 if (kvm_is_error_hva(hva)) {
1853 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1857 srcu_read_unlock(&kvm->srcu, srcu_idx);
1858 mmap_read_unlock(current->mm);
1861 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1862 sizeof(uint8_t) * args->count);
1871 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1875 int srcu_idx, i, r = 0;
1878 if (args->flags != 0)
1881 /* Enforce sane limit on memory allocation */
1882 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1885 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1889 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1890 sizeof(uint8_t) * args->count);
1896 /* Enable storage key handling for the guest */
1897 r = s390_enable_skey();
1902 mmap_read_lock(current->mm);
1903 srcu_idx = srcu_read_lock(&kvm->srcu);
1904 while (i < args->count) {
1906 hva = gfn_to_hva(kvm, args->start_gfn + i);
1907 if (kvm_is_error_hva(hva)) {
1912 /* Lowest order bit is reserved */
1913 if (keys[i] & 0x01) {
1918 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920 r = fixup_user_fault(current->mm, hva,
1921 FAULT_FLAG_WRITE, &unlocked);
1928 srcu_read_unlock(&kvm->srcu, srcu_idx);
1929 mmap_read_unlock(current->mm);
1936 * Base address and length must be sent at the start of each block, therefore
1937 * it's cheaper to send some clean data, as long as it's less than the size of
1940 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1941 /* for consistency */
1942 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1945 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1946 * address falls in a hole. In that case the index of one of the memslots
1947 * bordering the hole is returned.
1949 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1951 int start = 0, end = slots->used_slots;
1952 int slot = atomic_read(&slots->last_used_slot);
1953 struct kvm_memory_slot *memslots = slots->memslots;
1955 if (gfn >= memslots[slot].base_gfn &&
1956 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1959 while (start < end) {
1960 slot = start + (end - start) / 2;
1962 if (gfn >= memslots[slot].base_gfn)
1968 if (start >= slots->used_slots)
1969 return slots->used_slots - 1;
1971 if (gfn >= memslots[start].base_gfn &&
1972 gfn < memslots[start].base_gfn + memslots[start].npages) {
1973 atomic_set(&slots->last_used_slot, start);
1979 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1980 u8 *res, unsigned long bufsize)
1982 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1985 while (args->count < bufsize) {
1986 hva = gfn_to_hva(kvm, cur_gfn);
1988 * We return an error if the first value was invalid, but we
1989 * return successfully if at least one value was copied.
1991 if (kvm_is_error_hva(hva))
1992 return args->count ? 0 : -EFAULT;
1993 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1995 res[args->count++] = (pgstev >> 24) & 0x43;
2002 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2003 unsigned long cur_gfn)
2005 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2006 struct kvm_memory_slot *ms = slots->memslots + slotidx;
2007 unsigned long ofs = cur_gfn - ms->base_gfn;
2009 if (ms->base_gfn + ms->npages <= cur_gfn) {
2011 /* If we are above the highest slot, wrap around */
2013 slotidx = slots->used_slots - 1;
2015 ms = slots->memslots + slotidx;
2018 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2019 while ((slotidx > 0) && (ofs >= ms->npages)) {
2021 ms = slots->memslots + slotidx;
2022 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2024 return ms->base_gfn + ofs;
2027 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2028 u8 *res, unsigned long bufsize)
2030 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2031 struct kvm_memslots *slots = kvm_memslots(kvm);
2032 struct kvm_memory_slot *ms;
2034 if (unlikely(!slots->used_slots))
2037 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2038 ms = gfn_to_memslot(kvm, cur_gfn);
2040 args->start_gfn = cur_gfn;
2043 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2044 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2046 while (args->count < bufsize) {
2047 hva = gfn_to_hva(kvm, cur_gfn);
2048 if (kvm_is_error_hva(hva))
2050 /* Decrement only if we actually flipped the bit to 0 */
2051 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2052 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2053 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2055 /* Save the value */
2056 res[args->count++] = (pgstev >> 24) & 0x43;
2057 /* If the next bit is too far away, stop. */
2058 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2060 /* If we reached the previous "next", find the next one */
2061 if (cur_gfn == next_gfn)
2062 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2063 /* Reached the end of memory or of the buffer, stop */
2064 if ((next_gfn >= mem_end) ||
2065 (next_gfn - args->start_gfn >= bufsize))
2068 /* Reached the end of the current memslot, take the next one. */
2069 if (cur_gfn - ms->base_gfn >= ms->npages) {
2070 ms = gfn_to_memslot(kvm, cur_gfn);
2079 * This function searches for the next page with dirty CMMA attributes, and
2080 * saves the attributes in the buffer up to either the end of the buffer or
2081 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2082 * no trailing clean bytes are saved.
2083 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2084 * output buffer will indicate 0 as length.
2086 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2087 struct kvm_s390_cmma_log *args)
2089 unsigned long bufsize;
2090 int srcu_idx, peek, ret;
2093 if (!kvm->arch.use_cmma)
2095 /* Invalid/unsupported flags were specified */
2096 if (args->flags & ~KVM_S390_CMMA_PEEK)
2098 /* Migration mode query, and we are not doing a migration */
2099 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2100 if (!peek && !kvm->arch.migration_mode)
2102 /* CMMA is disabled or was not used, or the buffer has length zero */
2103 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2104 if (!bufsize || !kvm->mm->context.uses_cmm) {
2105 memset(args, 0, sizeof(*args));
2108 /* We are not peeking, and there are no dirty pages */
2109 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2110 memset(args, 0, sizeof(*args));
2114 values = vmalloc(bufsize);
2118 mmap_read_lock(kvm->mm);
2119 srcu_idx = srcu_read_lock(&kvm->srcu);
2121 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2123 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2124 srcu_read_unlock(&kvm->srcu, srcu_idx);
2125 mmap_read_unlock(kvm->mm);
2127 if (kvm->arch.migration_mode)
2128 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2130 args->remaining = 0;
2132 if (copy_to_user((void __user *)args->values, values, args->count))
2140 * This function sets the CMMA attributes for the given pages. If the input
2141 * buffer has zero length, no action is taken, otherwise the attributes are
2142 * set and the mm->context.uses_cmm flag is set.
2144 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2145 const struct kvm_s390_cmma_log *args)
2147 unsigned long hva, mask, pgstev, i;
2149 int srcu_idx, r = 0;
2153 if (!kvm->arch.use_cmma)
2155 /* invalid/unsupported flags */
2156 if (args->flags != 0)
2158 /* Enforce sane limit on memory allocation */
2159 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2162 if (args->count == 0)
2165 bits = vmalloc(array_size(sizeof(*bits), args->count));
2169 r = copy_from_user(bits, (void __user *)args->values, args->count);
2175 mmap_read_lock(kvm->mm);
2176 srcu_idx = srcu_read_lock(&kvm->srcu);
2177 for (i = 0; i < args->count; i++) {
2178 hva = gfn_to_hva(kvm, args->start_gfn + i);
2179 if (kvm_is_error_hva(hva)) {
2185 pgstev = pgstev << 24;
2186 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2187 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2189 srcu_read_unlock(&kvm->srcu, srcu_idx);
2190 mmap_read_unlock(kvm->mm);
2192 if (!kvm->mm->context.uses_cmm) {
2193 mmap_write_lock(kvm->mm);
2194 kvm->mm->context.uses_cmm = 1;
2195 mmap_write_unlock(kvm->mm);
2202 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2204 struct kvm_vcpu *vcpu;
2210 * We ignore failures and try to destroy as many CPUs as possible.
2211 * At the same time we must not free the assigned resources when
2212 * this fails, as the ultravisor has still access to that memory.
2213 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2215 * We want to return the first failure rc and rrc, though.
2217 kvm_for_each_vcpu(i, vcpu, kvm) {
2218 mutex_lock(&vcpu->mutex);
2219 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2224 mutex_unlock(&vcpu->mutex);
2229 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234 struct kvm_vcpu *vcpu;
2236 kvm_for_each_vcpu(i, vcpu, kvm) {
2237 mutex_lock(&vcpu->mutex);
2238 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2239 mutex_unlock(&vcpu->mutex);
2244 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2248 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2252 void __user *argp = (void __user *)cmd->data;
2255 case KVM_PV_ENABLE: {
2257 if (kvm_s390_pv_is_protected(kvm))
2261 * FMT 4 SIE needs esca. As we never switch back to bsca from
2262 * esca, we need no cleanup in the error cases below
2264 r = sca_switch_to_extended(kvm);
2268 mmap_write_lock(current->mm);
2269 r = gmap_mark_unmergeable();
2270 mmap_write_unlock(current->mm);
2274 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2278 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2280 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2282 /* we need to block service interrupts from now on */
2283 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2286 case KVM_PV_DISABLE: {
2288 if (!kvm_s390_pv_is_protected(kvm))
2291 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2293 * If a CPU could not be destroyed, destroy VM will also fail.
2294 * There is no point in trying to destroy it. Instead return
2295 * the rc and rrc from the first CPU that failed destroying.
2299 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2301 /* no need to block service interrupts any more */
2302 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2305 case KVM_PV_SET_SEC_PARMS: {
2306 struct kvm_s390_pv_sec_parm parms = {};
2310 if (!kvm_s390_pv_is_protected(kvm))
2314 if (copy_from_user(&parms, argp, sizeof(parms)))
2317 /* Currently restricted to 8KB */
2319 if (parms.length > PAGE_SIZE * 2)
2323 hdr = vmalloc(parms.length);
2328 if (!copy_from_user(hdr, (void __user *)parms.origin,
2330 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2331 &cmd->rc, &cmd->rrc);
2336 case KVM_PV_UNPACK: {
2337 struct kvm_s390_pv_unp unp = {};
2340 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2344 if (copy_from_user(&unp, argp, sizeof(unp)))
2347 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2348 &cmd->rc, &cmd->rrc);
2351 case KVM_PV_VERIFY: {
2353 if (!kvm_s390_pv_is_protected(kvm))
2356 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2357 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2358 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2362 case KVM_PV_PREP_RESET: {
2364 if (!kvm_s390_pv_is_protected(kvm))
2367 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2368 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2369 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2373 case KVM_PV_UNSHARE_ALL: {
2375 if (!kvm_s390_pv_is_protected(kvm))
2378 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2379 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2380 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2390 long kvm_arch_vm_ioctl(struct file *filp,
2391 unsigned int ioctl, unsigned long arg)
2393 struct kvm *kvm = filp->private_data;
2394 void __user *argp = (void __user *)arg;
2395 struct kvm_device_attr attr;
2399 case KVM_S390_INTERRUPT: {
2400 struct kvm_s390_interrupt s390int;
2403 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2405 r = kvm_s390_inject_vm(kvm, &s390int);
2408 case KVM_CREATE_IRQCHIP: {
2409 struct kvm_irq_routing_entry routing;
2412 if (kvm->arch.use_irqchip) {
2413 /* Set up dummy routing. */
2414 memset(&routing, 0, sizeof(routing));
2415 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2419 case KVM_SET_DEVICE_ATTR: {
2421 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2423 r = kvm_s390_vm_set_attr(kvm, &attr);
2426 case KVM_GET_DEVICE_ATTR: {
2428 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2430 r = kvm_s390_vm_get_attr(kvm, &attr);
2433 case KVM_HAS_DEVICE_ATTR: {
2435 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2437 r = kvm_s390_vm_has_attr(kvm, &attr);
2440 case KVM_S390_GET_SKEYS: {
2441 struct kvm_s390_skeys args;
2444 if (copy_from_user(&args, argp,
2445 sizeof(struct kvm_s390_skeys)))
2447 r = kvm_s390_get_skeys(kvm, &args);
2450 case KVM_S390_SET_SKEYS: {
2451 struct kvm_s390_skeys args;
2454 if (copy_from_user(&args, argp,
2455 sizeof(struct kvm_s390_skeys)))
2457 r = kvm_s390_set_skeys(kvm, &args);
2460 case KVM_S390_GET_CMMA_BITS: {
2461 struct kvm_s390_cmma_log args;
2464 if (copy_from_user(&args, argp, sizeof(args)))
2466 mutex_lock(&kvm->slots_lock);
2467 r = kvm_s390_get_cmma_bits(kvm, &args);
2468 mutex_unlock(&kvm->slots_lock);
2470 r = copy_to_user(argp, &args, sizeof(args));
2476 case KVM_S390_SET_CMMA_BITS: {
2477 struct kvm_s390_cmma_log args;
2480 if (copy_from_user(&args, argp, sizeof(args)))
2482 mutex_lock(&kvm->slots_lock);
2483 r = kvm_s390_set_cmma_bits(kvm, &args);
2484 mutex_unlock(&kvm->slots_lock);
2487 case KVM_S390_PV_COMMAND: {
2488 struct kvm_pv_cmd args;
2490 /* protvirt means user sigp */
2491 kvm->arch.user_cpu_state_ctrl = 1;
2493 if (!is_prot_virt_host()) {
2497 if (copy_from_user(&args, argp, sizeof(args))) {
2505 mutex_lock(&kvm->lock);
2506 r = kvm_s390_handle_pv(kvm, &args);
2507 mutex_unlock(&kvm->lock);
2508 if (copy_to_user(argp, &args, sizeof(args))) {
2521 static int kvm_s390_apxa_installed(void)
2523 struct ap_config_info info;
2525 if (ap_instructions_available()) {
2526 if (ap_qci(&info) == 0)
2534 * The format of the crypto control block (CRYCB) is specified in the 3 low
2535 * order bits of the CRYCB designation (CRYCBD) field as follows:
2536 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2537 * AP extended addressing (APXA) facility are installed.
2538 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2539 * Format 2: Both the APXA and MSAX3 facilities are installed
2541 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2543 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2545 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2546 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2548 /* Check whether MSAX3 is installed */
2549 if (!test_kvm_facility(kvm, 76))
2552 if (kvm_s390_apxa_installed())
2553 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2555 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2558 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2559 unsigned long *aqm, unsigned long *adm)
2561 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2563 mutex_lock(&kvm->lock);
2564 kvm_s390_vcpu_block_all(kvm);
2566 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2567 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2568 memcpy(crycb->apcb1.apm, apm, 32);
2569 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2570 apm[0], apm[1], apm[2], apm[3]);
2571 memcpy(crycb->apcb1.aqm, aqm, 32);
2572 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2573 aqm[0], aqm[1], aqm[2], aqm[3]);
2574 memcpy(crycb->apcb1.adm, adm, 32);
2575 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2576 adm[0], adm[1], adm[2], adm[3]);
2579 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2580 memcpy(crycb->apcb0.apm, apm, 8);
2581 memcpy(crycb->apcb0.aqm, aqm, 2);
2582 memcpy(crycb->apcb0.adm, adm, 2);
2583 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2584 apm[0], *((unsigned short *)aqm),
2585 *((unsigned short *)adm));
2587 default: /* Can not happen */
2591 /* recreate the shadow crycb for each vcpu */
2592 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2593 kvm_s390_vcpu_unblock_all(kvm);
2594 mutex_unlock(&kvm->lock);
2596 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2598 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2600 mutex_lock(&kvm->lock);
2601 kvm_s390_vcpu_block_all(kvm);
2603 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2604 sizeof(kvm->arch.crypto.crycb->apcb0));
2605 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2606 sizeof(kvm->arch.crypto.crycb->apcb1));
2608 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2609 /* recreate the shadow crycb for each vcpu */
2610 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2611 kvm_s390_vcpu_unblock_all(kvm);
2612 mutex_unlock(&kvm->lock);
2614 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2616 static u64 kvm_s390_get_initial_cpuid(void)
2621 cpuid.version = 0xff;
2622 return *((u64 *) &cpuid);
2625 static void kvm_s390_crypto_init(struct kvm *kvm)
2627 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2628 kvm_s390_set_crycb_format(kvm);
2630 if (!test_kvm_facility(kvm, 76))
2633 /* Enable AES/DEA protected key functions by default */
2634 kvm->arch.crypto.aes_kw = 1;
2635 kvm->arch.crypto.dea_kw = 1;
2636 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2637 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2638 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2639 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2642 static void sca_dispose(struct kvm *kvm)
2644 if (kvm->arch.use_esca)
2645 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2647 free_page((unsigned long)(kvm->arch.sca));
2648 kvm->arch.sca = NULL;
2651 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2653 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2655 char debug_name[16];
2656 static unsigned long sca_offset;
2659 #ifdef CONFIG_KVM_S390_UCONTROL
2660 if (type & ~KVM_VM_S390_UCONTROL)
2662 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2669 rc = s390_enable_sie();
2675 if (!sclp.has_64bscao)
2676 alloc_flags |= GFP_DMA;
2677 rwlock_init(&kvm->arch.sca_lock);
2678 /* start with basic SCA */
2679 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2682 mutex_lock(&kvm_lock);
2684 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2686 kvm->arch.sca = (struct bsca_block *)
2687 ((char *) kvm->arch.sca + sca_offset);
2688 mutex_unlock(&kvm_lock);
2690 sprintf(debug_name, "kvm-%u", current->pid);
2692 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2696 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2697 kvm->arch.sie_page2 =
2698 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2699 if (!kvm->arch.sie_page2)
2702 kvm->arch.sie_page2->kvm = kvm;
2703 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2705 for (i = 0; i < kvm_s390_fac_size(); i++) {
2706 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2707 (kvm_s390_fac_base[i] |
2708 kvm_s390_fac_ext[i]);
2709 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2710 kvm_s390_fac_base[i];
2712 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2714 /* we are always in czam mode - even on pre z14 machines */
2715 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2716 set_kvm_facility(kvm->arch.model.fac_list, 138);
2717 /* we emulate STHYI in kvm */
2718 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2719 set_kvm_facility(kvm->arch.model.fac_list, 74);
2720 if (MACHINE_HAS_TLB_GUEST) {
2721 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2722 set_kvm_facility(kvm->arch.model.fac_list, 147);
2725 if (css_general_characteristics.aiv && test_facility(65))
2726 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2728 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2729 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2731 kvm_s390_crypto_init(kvm);
2733 mutex_init(&kvm->arch.float_int.ais_lock);
2734 spin_lock_init(&kvm->arch.float_int.lock);
2735 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2736 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2737 init_waitqueue_head(&kvm->arch.ipte_wq);
2738 mutex_init(&kvm->arch.ipte_mutex);
2740 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2741 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2743 if (type & KVM_VM_S390_UCONTROL) {
2744 kvm->arch.gmap = NULL;
2745 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2747 if (sclp.hamax == U64_MAX)
2748 kvm->arch.mem_limit = TASK_SIZE_MAX;
2750 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2752 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2753 if (!kvm->arch.gmap)
2755 kvm->arch.gmap->private = kvm;
2756 kvm->arch.gmap->pfault_enabled = 0;
2759 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2760 kvm->arch.use_skf = sclp.has_skey;
2761 spin_lock_init(&kvm->arch.start_stop_lock);
2762 kvm_s390_vsie_init(kvm);
2764 kvm_s390_gisa_init(kvm);
2765 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2769 free_page((unsigned long)kvm->arch.sie_page2);
2770 debug_unregister(kvm->arch.dbf);
2772 KVM_EVENT(3, "creation of vm failed: %d", rc);
2776 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2780 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2781 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2782 kvm_s390_clear_local_irqs(vcpu);
2783 kvm_clear_async_pf_completion_queue(vcpu);
2784 if (!kvm_is_ucontrol(vcpu->kvm))
2787 if (kvm_is_ucontrol(vcpu->kvm))
2788 gmap_remove(vcpu->arch.gmap);
2790 if (vcpu->kvm->arch.use_cmma)
2791 kvm_s390_vcpu_unsetup_cmma(vcpu);
2792 /* We can not hold the vcpu mutex here, we are already dying */
2793 if (kvm_s390_pv_cpu_get_handle(vcpu))
2794 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2795 free_page((unsigned long)(vcpu->arch.sie_block));
2798 static void kvm_free_vcpus(struct kvm *kvm)
2801 struct kvm_vcpu *vcpu;
2803 kvm_for_each_vcpu(i, vcpu, kvm)
2804 kvm_vcpu_destroy(vcpu);
2806 mutex_lock(&kvm->lock);
2807 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2808 kvm->vcpus[i] = NULL;
2810 atomic_set(&kvm->online_vcpus, 0);
2811 mutex_unlock(&kvm->lock);
2814 void kvm_arch_destroy_vm(struct kvm *kvm)
2818 kvm_free_vcpus(kvm);
2820 kvm_s390_gisa_destroy(kvm);
2822 * We are already at the end of life and kvm->lock is not taken.
2823 * This is ok as the file descriptor is closed by now and nobody
2824 * can mess with the pv state. To avoid lockdep_assert_held from
2825 * complaining we do not use kvm_s390_pv_is_protected.
2827 if (kvm_s390_pv_get_handle(kvm))
2828 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2829 debug_unregister(kvm->arch.dbf);
2830 free_page((unsigned long)kvm->arch.sie_page2);
2831 if (!kvm_is_ucontrol(kvm))
2832 gmap_remove(kvm->arch.gmap);
2833 kvm_s390_destroy_adapters(kvm);
2834 kvm_s390_clear_float_irqs(kvm);
2835 kvm_s390_vsie_destroy(kvm);
2836 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2839 /* Section: vcpu related */
2840 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2842 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2843 if (!vcpu->arch.gmap)
2845 vcpu->arch.gmap->private = vcpu->kvm;
2850 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2852 if (!kvm_s390_use_sca_entries())
2854 read_lock(&vcpu->kvm->arch.sca_lock);
2855 if (vcpu->kvm->arch.use_esca) {
2856 struct esca_block *sca = vcpu->kvm->arch.sca;
2858 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2859 sca->cpu[vcpu->vcpu_id].sda = 0;
2861 struct bsca_block *sca = vcpu->kvm->arch.sca;
2863 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2864 sca->cpu[vcpu->vcpu_id].sda = 0;
2866 read_unlock(&vcpu->kvm->arch.sca_lock);
2869 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2871 if (!kvm_s390_use_sca_entries()) {
2872 struct bsca_block *sca = vcpu->kvm->arch.sca;
2874 /* we still need the basic sca for the ipte control */
2875 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2876 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2879 read_lock(&vcpu->kvm->arch.sca_lock);
2880 if (vcpu->kvm->arch.use_esca) {
2881 struct esca_block *sca = vcpu->kvm->arch.sca;
2883 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2884 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2885 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2886 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2887 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2889 struct bsca_block *sca = vcpu->kvm->arch.sca;
2891 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2892 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2893 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2894 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2896 read_unlock(&vcpu->kvm->arch.sca_lock);
2899 /* Basic SCA to Extended SCA data copy routines */
2900 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2903 d->sigp_ctrl.c = s->sigp_ctrl.c;
2904 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2907 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2911 d->ipte_control = s->ipte_control;
2913 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2914 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2917 static int sca_switch_to_extended(struct kvm *kvm)
2919 struct bsca_block *old_sca = kvm->arch.sca;
2920 struct esca_block *new_sca;
2921 struct kvm_vcpu *vcpu;
2922 unsigned int vcpu_idx;
2925 if (kvm->arch.use_esca)
2928 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2932 scaoh = (u32)((u64)(new_sca) >> 32);
2933 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2935 kvm_s390_vcpu_block_all(kvm);
2936 write_lock(&kvm->arch.sca_lock);
2938 sca_copy_b_to_e(new_sca, old_sca);
2940 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2941 vcpu->arch.sie_block->scaoh = scaoh;
2942 vcpu->arch.sie_block->scaol = scaol;
2943 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2945 kvm->arch.sca = new_sca;
2946 kvm->arch.use_esca = 1;
2948 write_unlock(&kvm->arch.sca_lock);
2949 kvm_s390_vcpu_unblock_all(kvm);
2951 free_page((unsigned long)old_sca);
2953 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2954 old_sca, kvm->arch.sca);
2958 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2962 if (!kvm_s390_use_sca_entries()) {
2963 if (id < KVM_MAX_VCPUS)
2967 if (id < KVM_S390_BSCA_CPU_SLOTS)
2969 if (!sclp.has_esca || !sclp.has_64bscao)
2972 mutex_lock(&kvm->lock);
2973 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2974 mutex_unlock(&kvm->lock);
2976 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2979 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2980 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2982 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2983 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2984 vcpu->arch.cputm_start = get_tod_clock_fast();
2985 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2988 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2989 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2991 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2992 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2993 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2994 vcpu->arch.cputm_start = 0;
2995 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2998 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2999 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3001 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3002 vcpu->arch.cputm_enabled = true;
3003 __start_cpu_timer_accounting(vcpu);
3006 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3007 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3010 __stop_cpu_timer_accounting(vcpu);
3011 vcpu->arch.cputm_enabled = false;
3014 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3016 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3017 __enable_cpu_timer_accounting(vcpu);
3021 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3023 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3024 __disable_cpu_timer_accounting(vcpu);
3028 /* set the cpu timer - may only be called from the VCPU thread itself */
3029 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3031 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3032 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3033 if (vcpu->arch.cputm_enabled)
3034 vcpu->arch.cputm_start = get_tod_clock_fast();
3035 vcpu->arch.sie_block->cputm = cputm;
3036 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3040 /* update and get the cpu timer - can also be called from other VCPU threads */
3041 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3046 if (unlikely(!vcpu->arch.cputm_enabled))
3047 return vcpu->arch.sie_block->cputm;
3049 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3051 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3053 * If the writer would ever execute a read in the critical
3054 * section, e.g. in irq context, we have a deadlock.
3056 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3057 value = vcpu->arch.sie_block->cputm;
3058 /* if cputm_start is 0, accounting is being started/stopped */
3059 if (likely(vcpu->arch.cputm_start))
3060 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3061 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3066 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3069 gmap_enable(vcpu->arch.enabled_gmap);
3070 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3071 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3072 __start_cpu_timer_accounting(vcpu);
3076 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3079 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3080 __stop_cpu_timer_accounting(vcpu);
3081 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3082 vcpu->arch.enabled_gmap = gmap_get_enabled();
3083 gmap_disable(vcpu->arch.enabled_gmap);
3087 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3089 mutex_lock(&vcpu->kvm->lock);
3091 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3092 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3094 mutex_unlock(&vcpu->kvm->lock);
3095 if (!kvm_is_ucontrol(vcpu->kvm)) {
3096 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3099 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3100 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3101 /* make vcpu_load load the right gmap on the first trigger */
3102 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3105 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3107 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3108 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3113 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3115 /* At least one ECC subfunction must be present */
3116 return kvm_has_pckmo_subfunc(kvm, 32) ||
3117 kvm_has_pckmo_subfunc(kvm, 33) ||
3118 kvm_has_pckmo_subfunc(kvm, 34) ||
3119 kvm_has_pckmo_subfunc(kvm, 40) ||
3120 kvm_has_pckmo_subfunc(kvm, 41);
3124 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3127 * If the AP instructions are not being interpreted and the MSAX3
3128 * facility is not configured for the guest, there is nothing to set up.
3130 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3133 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3134 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3135 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3136 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3138 if (vcpu->kvm->arch.crypto.apie)
3139 vcpu->arch.sie_block->eca |= ECA_APIE;
3141 /* Set up protected key support */
3142 if (vcpu->kvm->arch.crypto.aes_kw) {
3143 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3144 /* ecc is also wrapped with AES key */
3145 if (kvm_has_pckmo_ecc(vcpu->kvm))
3146 vcpu->arch.sie_block->ecd |= ECD_ECC;
3149 if (vcpu->kvm->arch.crypto.dea_kw)
3150 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3153 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3155 free_page(vcpu->arch.sie_block->cbrlo);
3156 vcpu->arch.sie_block->cbrlo = 0;
3159 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3161 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3162 if (!vcpu->arch.sie_block->cbrlo)
3167 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3169 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3171 vcpu->arch.sie_block->ibc = model->ibc;
3172 if (test_kvm_facility(vcpu->kvm, 7))
3173 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3176 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3181 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3185 if (test_kvm_facility(vcpu->kvm, 78))
3186 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3187 else if (test_kvm_facility(vcpu->kvm, 8))
3188 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3190 kvm_s390_vcpu_setup_model(vcpu);
3192 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3193 if (MACHINE_HAS_ESOP)
3194 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3195 if (test_kvm_facility(vcpu->kvm, 9))
3196 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3197 if (test_kvm_facility(vcpu->kvm, 73))
3198 vcpu->arch.sie_block->ecb |= ECB_TE;
3200 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3201 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3202 if (test_kvm_facility(vcpu->kvm, 130))
3203 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3204 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3206 vcpu->arch.sie_block->eca |= ECA_CEI;
3208 vcpu->arch.sie_block->eca |= ECA_IB;
3210 vcpu->arch.sie_block->eca |= ECA_SII;
3211 if (sclp.has_sigpif)
3212 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3213 if (test_kvm_facility(vcpu->kvm, 129)) {
3214 vcpu->arch.sie_block->eca |= ECA_VX;
3215 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3217 if (test_kvm_facility(vcpu->kvm, 139))
3218 vcpu->arch.sie_block->ecd |= ECD_MEF;
3219 if (test_kvm_facility(vcpu->kvm, 156))
3220 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3221 if (vcpu->arch.sie_block->gd) {
3222 vcpu->arch.sie_block->eca |= ECA_AIV;
3223 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3224 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3226 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3228 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3231 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3233 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3235 if (vcpu->kvm->arch.use_cmma) {
3236 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3240 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3241 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3243 vcpu->arch.sie_block->hpid = HPID_KVM;
3245 kvm_s390_vcpu_crypto_setup(vcpu);
3247 mutex_lock(&vcpu->kvm->lock);
3248 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3249 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3251 kvm_s390_vcpu_unsetup_cmma(vcpu);
3253 mutex_unlock(&vcpu->kvm->lock);
3258 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3260 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3265 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3267 struct sie_page *sie_page;
3270 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3271 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3275 vcpu->arch.sie_block = &sie_page->sie_block;
3276 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3278 /* the real guest size will always be smaller than msl */
3279 vcpu->arch.sie_block->mso = 0;
3280 vcpu->arch.sie_block->msl = sclp.hamax;
3282 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3283 spin_lock_init(&vcpu->arch.local_int.lock);
3284 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3285 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3286 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3287 seqcount_init(&vcpu->arch.cputm_seqcount);
3289 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3290 kvm_clear_async_pf_completion_queue(vcpu);
3291 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3298 kvm_s390_set_prefix(vcpu, 0);
3299 if (test_kvm_facility(vcpu->kvm, 64))
3300 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3301 if (test_kvm_facility(vcpu->kvm, 82))
3302 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3303 if (test_kvm_facility(vcpu->kvm, 133))
3304 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3305 if (test_kvm_facility(vcpu->kvm, 156))
3306 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3307 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3308 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3311 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3313 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3315 if (kvm_is_ucontrol(vcpu->kvm)) {
3316 rc = __kvm_ucontrol_vcpu_init(vcpu);
3318 goto out_free_sie_block;
3321 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3322 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3323 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3325 rc = kvm_s390_vcpu_setup(vcpu);
3327 goto out_ucontrol_uninit;
3330 out_ucontrol_uninit:
3331 if (kvm_is_ucontrol(vcpu->kvm))
3332 gmap_remove(vcpu->arch.gmap);
3334 free_page((unsigned long)(vcpu->arch.sie_block));
3338 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3340 return kvm_s390_vcpu_has_irq(vcpu, 0);
3343 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3345 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3348 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3350 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3354 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3356 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3359 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3361 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3365 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3367 return atomic_read(&vcpu->arch.sie_block->prog20) &
3368 (PROG_BLOCK_SIE | PROG_REQUEST);
3371 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3373 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3377 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3378 * If the CPU is not running (e.g. waiting as idle) the function will
3379 * return immediately. */
3380 void exit_sie(struct kvm_vcpu *vcpu)
3382 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3383 kvm_s390_vsie_kick(vcpu);
3384 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3388 /* Kick a guest cpu out of SIE to process a request synchronously */
3389 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3391 kvm_make_request(req, vcpu);
3392 kvm_s390_vcpu_request(vcpu);
3395 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3398 struct kvm *kvm = gmap->private;
3399 struct kvm_vcpu *vcpu;
3400 unsigned long prefix;
3403 if (gmap_is_shadow(gmap))
3405 if (start >= 1UL << 31)
3406 /* We are only interested in prefix pages */
3408 kvm_for_each_vcpu(i, vcpu, kvm) {
3409 /* match against both prefix pages */
3410 prefix = kvm_s390_get_prefix(vcpu);
3411 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3412 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3414 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3419 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3421 /* do not poll with more than halt_poll_max_steal percent of steal time */
3422 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3423 halt_poll_max_steal) {
3424 vcpu->stat.halt_no_poll_steal++;
3430 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3432 /* kvm common code refers to this, but never calls it */
3437 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3438 struct kvm_one_reg *reg)
3443 case KVM_REG_S390_TODPR:
3444 r = put_user(vcpu->arch.sie_block->todpr,
3445 (u32 __user *)reg->addr);
3447 case KVM_REG_S390_EPOCHDIFF:
3448 r = put_user(vcpu->arch.sie_block->epoch,
3449 (u64 __user *)reg->addr);
3451 case KVM_REG_S390_CPU_TIMER:
3452 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3453 (u64 __user *)reg->addr);
3455 case KVM_REG_S390_CLOCK_COMP:
3456 r = put_user(vcpu->arch.sie_block->ckc,
3457 (u64 __user *)reg->addr);
3459 case KVM_REG_S390_PFTOKEN:
3460 r = put_user(vcpu->arch.pfault_token,
3461 (u64 __user *)reg->addr);
3463 case KVM_REG_S390_PFCOMPARE:
3464 r = put_user(vcpu->arch.pfault_compare,
3465 (u64 __user *)reg->addr);
3467 case KVM_REG_S390_PFSELECT:
3468 r = put_user(vcpu->arch.pfault_select,
3469 (u64 __user *)reg->addr);
3471 case KVM_REG_S390_PP:
3472 r = put_user(vcpu->arch.sie_block->pp,
3473 (u64 __user *)reg->addr);
3475 case KVM_REG_S390_GBEA:
3476 r = put_user(vcpu->arch.sie_block->gbea,
3477 (u64 __user *)reg->addr);
3486 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3487 struct kvm_one_reg *reg)
3493 case KVM_REG_S390_TODPR:
3494 r = get_user(vcpu->arch.sie_block->todpr,
3495 (u32 __user *)reg->addr);
3497 case KVM_REG_S390_EPOCHDIFF:
3498 r = get_user(vcpu->arch.sie_block->epoch,
3499 (u64 __user *)reg->addr);
3501 case KVM_REG_S390_CPU_TIMER:
3502 r = get_user(val, (u64 __user *)reg->addr);
3504 kvm_s390_set_cpu_timer(vcpu, val);
3506 case KVM_REG_S390_CLOCK_COMP:
3507 r = get_user(vcpu->arch.sie_block->ckc,
3508 (u64 __user *)reg->addr);
3510 case KVM_REG_S390_PFTOKEN:
3511 r = get_user(vcpu->arch.pfault_token,
3512 (u64 __user *)reg->addr);
3513 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3514 kvm_clear_async_pf_completion_queue(vcpu);
3516 case KVM_REG_S390_PFCOMPARE:
3517 r = get_user(vcpu->arch.pfault_compare,
3518 (u64 __user *)reg->addr);
3520 case KVM_REG_S390_PFSELECT:
3521 r = get_user(vcpu->arch.pfault_select,
3522 (u64 __user *)reg->addr);
3524 case KVM_REG_S390_PP:
3525 r = get_user(vcpu->arch.sie_block->pp,
3526 (u64 __user *)reg->addr);
3528 case KVM_REG_S390_GBEA:
3529 r = get_user(vcpu->arch.sie_block->gbea,
3530 (u64 __user *)reg->addr);
3539 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3541 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3542 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3543 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3545 kvm_clear_async_pf_completion_queue(vcpu);
3546 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3547 kvm_s390_vcpu_stop(vcpu);
3548 kvm_s390_clear_local_irqs(vcpu);
3551 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3553 /* Initial reset is a superset of the normal reset */
3554 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3557 * This equals initial cpu reset in pop, but we don't switch to ESA.
3558 * We do not only reset the internal data, but also ...
3560 vcpu->arch.sie_block->gpsw.mask = 0;
3561 vcpu->arch.sie_block->gpsw.addr = 0;
3562 kvm_s390_set_prefix(vcpu, 0);
3563 kvm_s390_set_cpu_timer(vcpu, 0);
3564 vcpu->arch.sie_block->ckc = 0;
3565 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3566 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3567 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3569 /* ... the data in sync regs */
3570 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3571 vcpu->run->s.regs.ckc = 0;
3572 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3573 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3574 vcpu->run->psw_addr = 0;
3575 vcpu->run->psw_mask = 0;
3576 vcpu->run->s.regs.todpr = 0;
3577 vcpu->run->s.regs.cputm = 0;
3578 vcpu->run->s.regs.ckc = 0;
3579 vcpu->run->s.regs.pp = 0;
3580 vcpu->run->s.regs.gbea = 1;
3581 vcpu->run->s.regs.fpc = 0;
3583 * Do not reset these registers in the protected case, as some of
3584 * them are overlayed and they are not accessible in this case
3587 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3588 vcpu->arch.sie_block->gbea = 1;
3589 vcpu->arch.sie_block->pp = 0;
3590 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3591 vcpu->arch.sie_block->todpr = 0;
3595 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3597 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3599 /* Clear reset is a superset of the initial reset */
3600 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3602 memset(®s->gprs, 0, sizeof(regs->gprs));
3603 memset(®s->vrs, 0, sizeof(regs->vrs));
3604 memset(®s->acrs, 0, sizeof(regs->acrs));
3605 memset(®s->gscb, 0, sizeof(regs->gscb));
3608 regs->etoken_extension = 0;
3611 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3614 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3619 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3622 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3627 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3628 struct kvm_sregs *sregs)
3632 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3633 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3639 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3640 struct kvm_sregs *sregs)
3644 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3645 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3651 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3657 if (test_fp_ctl(fpu->fpc)) {
3661 vcpu->run->s.regs.fpc = fpu->fpc;
3663 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3664 (freg_t *) fpu->fprs);
3666 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3673 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3677 /* make sure we have the latest values */
3680 convert_vx_to_fp((freg_t *) fpu->fprs,
3681 (__vector128 *) vcpu->run->s.regs.vrs);
3683 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3684 fpu->fpc = vcpu->run->s.regs.fpc;
3690 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3694 if (!is_vcpu_stopped(vcpu))
3697 vcpu->run->psw_mask = psw.mask;
3698 vcpu->run->psw_addr = psw.addr;
3703 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3704 struct kvm_translation *tr)
3706 return -EINVAL; /* not implemented yet */
3709 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3710 KVM_GUESTDBG_USE_HW_BP | \
3711 KVM_GUESTDBG_ENABLE)
3713 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3714 struct kvm_guest_debug *dbg)
3720 vcpu->guest_debug = 0;
3721 kvm_s390_clear_bp_data(vcpu);
3723 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3727 if (!sclp.has_gpere) {
3732 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3733 vcpu->guest_debug = dbg->control;
3734 /* enforce guest PER */
3735 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3737 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3738 rc = kvm_s390_import_bp_data(vcpu, dbg);
3740 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3741 vcpu->arch.guestdbg.last_bp = 0;
3745 vcpu->guest_debug = 0;
3746 kvm_s390_clear_bp_data(vcpu);
3747 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3755 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3756 struct kvm_mp_state *mp_state)
3762 /* CHECK_STOP and LOAD are not supported yet */
3763 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3764 KVM_MP_STATE_OPERATING;
3770 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3771 struct kvm_mp_state *mp_state)
3777 /* user space knows about this interface - let it control the state */
3778 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3780 switch (mp_state->mp_state) {
3781 case KVM_MP_STATE_STOPPED:
3782 rc = kvm_s390_vcpu_stop(vcpu);
3784 case KVM_MP_STATE_OPERATING:
3785 rc = kvm_s390_vcpu_start(vcpu);
3787 case KVM_MP_STATE_LOAD:
3788 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3792 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3794 case KVM_MP_STATE_CHECK_STOP:
3795 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3804 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3806 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3809 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3812 kvm_s390_vcpu_request_handled(vcpu);
3813 if (!kvm_request_pending(vcpu))
3816 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3817 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3818 * This ensures that the ipte instruction for this request has
3819 * already finished. We might race against a second unmapper that
3820 * wants to set the blocking bit. Lets just retry the request loop.
3822 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3824 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3825 kvm_s390_get_prefix(vcpu),
3826 PAGE_SIZE * 2, PROT_WRITE);
3828 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3834 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3835 vcpu->arch.sie_block->ihcpu = 0xffff;
3839 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3840 if (!ibs_enabled(vcpu)) {
3841 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3842 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3847 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3848 if (ibs_enabled(vcpu)) {
3849 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3850 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3855 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3856 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3860 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3862 * Disable CMM virtualization; we will emulate the ESSA
3863 * instruction manually, in order to provide additional
3864 * functionalities needed for live migration.
3866 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3870 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3872 * Re-enable CMM virtualization if CMMA is available and
3873 * CMM has been used.
3875 if ((vcpu->kvm->arch.use_cmma) &&
3876 (vcpu->kvm->mm->context.uses_cmm))
3877 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3881 /* nothing to do, just clear the request */
3882 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3883 /* we left the vsie handler, nothing to do, just clear the request */
3884 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3889 void kvm_s390_set_tod_clock(struct kvm *kvm,
3890 const struct kvm_s390_vm_tod_clock *gtod)
3892 struct kvm_vcpu *vcpu;
3893 union tod_clock clk;
3896 mutex_lock(&kvm->lock);
3899 store_tod_clock_ext(&clk);
3901 kvm->arch.epoch = gtod->tod - clk.tod;
3903 if (test_kvm_facility(kvm, 139)) {
3904 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3905 if (kvm->arch.epoch > gtod->tod)
3906 kvm->arch.epdx -= 1;
3909 kvm_s390_vcpu_block_all(kvm);
3910 kvm_for_each_vcpu(i, vcpu, kvm) {
3911 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3912 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3915 kvm_s390_vcpu_unblock_all(kvm);
3917 mutex_unlock(&kvm->lock);
3921 * kvm_arch_fault_in_page - fault-in guest page if necessary
3922 * @vcpu: The corresponding virtual cpu
3923 * @gpa: Guest physical address
3924 * @writable: Whether the page should be writable or not
3926 * Make sure that a guest page has been faulted-in on the host.
3928 * Return: Zero on success, negative error code otherwise.
3930 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3932 return gmap_fault(vcpu->arch.gmap, gpa,
3933 writable ? FAULT_FLAG_WRITE : 0);
3936 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3937 unsigned long token)
3939 struct kvm_s390_interrupt inti;
3940 struct kvm_s390_irq irq;
3943 irq.u.ext.ext_params2 = token;
3944 irq.type = KVM_S390_INT_PFAULT_INIT;
3945 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3947 inti.type = KVM_S390_INT_PFAULT_DONE;
3948 inti.parm64 = token;
3949 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3953 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3954 struct kvm_async_pf *work)
3956 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3957 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3962 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3963 struct kvm_async_pf *work)
3965 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3966 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3969 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3970 struct kvm_async_pf *work)
3972 /* s390 will always inject the page directly */
3975 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3978 * s390 will always inject the page directly,
3979 * but we still want check_async_completion to cleanup
3984 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3987 struct kvm_arch_async_pf arch;
3989 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3991 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3992 vcpu->arch.pfault_compare)
3994 if (psw_extint_disabled(vcpu))
3996 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3998 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4000 if (!vcpu->arch.gmap->pfault_enabled)
4003 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4004 hva += current->thread.gmap_addr & ~PAGE_MASK;
4005 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4008 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4011 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4016 * On s390 notifications for arriving pages will be delivered directly
4017 * to the guest but the house keeping for completed pfaults is
4018 * handled outside the worker.
4020 kvm_check_async_pf_completion(vcpu);
4022 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4023 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4028 if (!kvm_is_ucontrol(vcpu->kvm)) {
4029 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4034 rc = kvm_s390_handle_requests(vcpu);
4038 if (guestdbg_enabled(vcpu)) {
4039 kvm_s390_backup_guest_per_regs(vcpu);
4040 kvm_s390_patch_guest_per_regs(vcpu);
4043 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4045 vcpu->arch.sie_block->icptcode = 0;
4046 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4047 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4048 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4053 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4055 struct kvm_s390_pgm_info pgm_info = {
4056 .code = PGM_ADDRESSING,
4061 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4062 trace_kvm_s390_sie_fault(vcpu);
4065 * We want to inject an addressing exception, which is defined as a
4066 * suppressing or terminating exception. However, since we came here
4067 * by a DAT access exception, the PSW still points to the faulting
4068 * instruction since DAT exceptions are nullifying. So we've got
4069 * to look up the current opcode to get the length of the instruction
4070 * to be able to forward the PSW.
4072 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4073 ilen = insn_length(opcode);
4077 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4078 * Forward by arbitrary ilc, injection will take care of
4079 * nullification if necessary.
4081 pgm_info = vcpu->arch.pgm;
4084 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4085 kvm_s390_forward_psw(vcpu, ilen);
4086 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4089 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4091 struct mcck_volatile_info *mcck_info;
4092 struct sie_page *sie_page;
4094 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4095 vcpu->arch.sie_block->icptcode);
4096 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4098 if (guestdbg_enabled(vcpu))
4099 kvm_s390_restore_guest_per_regs(vcpu);
4101 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4102 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4104 if (exit_reason == -EINTR) {
4105 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4106 sie_page = container_of(vcpu->arch.sie_block,
4107 struct sie_page, sie_block);
4108 mcck_info = &sie_page->mcck_info;
4109 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4113 if (vcpu->arch.sie_block->icptcode > 0) {
4114 int rc = kvm_handle_sie_intercept(vcpu);
4116 if (rc != -EOPNOTSUPP)
4118 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4119 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4120 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4121 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4123 } else if (exit_reason != -EFAULT) {
4124 vcpu->stat.exit_null++;
4126 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4127 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4128 vcpu->run->s390_ucontrol.trans_exc_code =
4129 current->thread.gmap_addr;
4130 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4132 } else if (current->thread.gmap_pfault) {
4133 trace_kvm_s390_major_guest_pfault(vcpu);
4134 current->thread.gmap_pfault = 0;
4135 if (kvm_arch_setup_async_pf(vcpu))
4137 vcpu->stat.pfault_sync++;
4138 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4140 return vcpu_post_run_fault_in_sie(vcpu);
4143 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4144 static int __vcpu_run(struct kvm_vcpu *vcpu)
4146 int rc, exit_reason;
4147 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4150 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4151 * ning the guest), so that memslots (and other stuff) are protected
4153 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4156 rc = vcpu_pre_run(vcpu);
4160 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4162 * As PF_VCPU will be used in fault handler, between
4163 * guest_enter and guest_exit should be no uaccess.
4165 local_irq_disable();
4166 guest_enter_irqoff();
4167 __disable_cpu_timer_accounting(vcpu);
4169 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4170 memcpy(sie_page->pv_grregs,
4171 vcpu->run->s.regs.gprs,
4172 sizeof(sie_page->pv_grregs));
4174 if (test_cpu_flag(CIF_FPU))
4176 exit_reason = sie64a(vcpu->arch.sie_block,
4177 vcpu->run->s.regs.gprs);
4178 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4179 memcpy(vcpu->run->s.regs.gprs,
4180 sie_page->pv_grregs,
4181 sizeof(sie_page->pv_grregs));
4183 * We're not allowed to inject interrupts on intercepts
4184 * that leave the guest state in an "in-between" state
4185 * where the next SIE entry will do a continuation.
4186 * Fence interrupts in our "internal" PSW.
4188 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4189 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4190 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4193 local_irq_disable();
4194 __enable_cpu_timer_accounting(vcpu);
4195 guest_exit_irqoff();
4197 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4199 rc = vcpu_post_run(vcpu, exit_reason);
4200 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4202 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4206 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4208 struct kvm_run *kvm_run = vcpu->run;
4209 struct runtime_instr_cb *riccb;
4212 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4213 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4214 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4215 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4216 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4217 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4218 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4219 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4221 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4222 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4223 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4224 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4225 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4226 kvm_clear_async_pf_completion_queue(vcpu);
4228 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4229 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4230 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4233 * If userspace sets the riccb (e.g. after migration) to a valid state,
4234 * we should enable RI here instead of doing the lazy enablement.
4236 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4237 test_kvm_facility(vcpu->kvm, 64) &&
4239 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4240 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4241 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4244 * If userspace sets the gscb (e.g. after migration) to non-zero,
4245 * we should enable GS here instead of doing the lazy enablement.
4247 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4248 test_kvm_facility(vcpu->kvm, 133) &&
4250 !vcpu->arch.gs_enabled) {
4251 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4252 vcpu->arch.sie_block->ecb |= ECB_GS;
4253 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4254 vcpu->arch.gs_enabled = 1;
4256 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4257 test_kvm_facility(vcpu->kvm, 82)) {
4258 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4259 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4261 if (MACHINE_HAS_GS) {
4263 __ctl_set_bit(2, 4);
4264 if (current->thread.gs_cb) {
4265 vcpu->arch.host_gscb = current->thread.gs_cb;
4266 save_gs_cb(vcpu->arch.host_gscb);
4268 if (vcpu->arch.gs_enabled) {
4269 current->thread.gs_cb = (struct gs_cb *)
4270 &vcpu->run->s.regs.gscb;
4271 restore_gs_cb(current->thread.gs_cb);
4275 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4278 static void sync_regs(struct kvm_vcpu *vcpu)
4280 struct kvm_run *kvm_run = vcpu->run;
4282 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4283 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4284 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4285 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4286 /* some control register changes require a tlb flush */
4287 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4289 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4290 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4291 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4293 save_access_regs(vcpu->arch.host_acrs);
4294 restore_access_regs(vcpu->run->s.regs.acrs);
4295 /* save host (userspace) fprs/vrs */
4297 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4298 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4300 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4302 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4303 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4304 if (test_fp_ctl(current->thread.fpu.fpc))
4305 /* User space provided an invalid FPC, let's clear it */
4306 current->thread.fpu.fpc = 0;
4308 /* Sync fmt2 only data */
4309 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4310 sync_regs_fmt2(vcpu);
4313 * In several places we have to modify our internal view to
4314 * not do things that are disallowed by the ultravisor. For
4315 * example we must not inject interrupts after specific exits
4316 * (e.g. 112 prefix page not secure). We do this by turning
4317 * off the machine check, external and I/O interrupt bits
4318 * of our PSW copy. To avoid getting validity intercepts, we
4319 * do only accept the condition code from userspace.
4321 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4322 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4326 kvm_run->kvm_dirty_regs = 0;
4329 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4331 struct kvm_run *kvm_run = vcpu->run;
4333 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4334 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4335 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4336 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4337 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4338 if (MACHINE_HAS_GS) {
4340 __ctl_set_bit(2, 4);
4341 if (vcpu->arch.gs_enabled)
4342 save_gs_cb(current->thread.gs_cb);
4343 current->thread.gs_cb = vcpu->arch.host_gscb;
4344 restore_gs_cb(vcpu->arch.host_gscb);
4345 if (!vcpu->arch.host_gscb)
4346 __ctl_clear_bit(2, 4);
4347 vcpu->arch.host_gscb = NULL;
4350 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4353 static void store_regs(struct kvm_vcpu *vcpu)
4355 struct kvm_run *kvm_run = vcpu->run;
4357 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4358 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4359 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4360 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4361 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4362 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4363 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4364 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4365 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4366 save_access_regs(vcpu->run->s.regs.acrs);
4367 restore_access_regs(vcpu->arch.host_acrs);
4368 /* Save guest register state */
4370 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4371 /* Restore will be done lazily at return */
4372 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4373 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4374 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4375 store_regs_fmt2(vcpu);
4378 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4380 struct kvm_run *kvm_run = vcpu->run;
4383 if (kvm_run->immediate_exit)
4386 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4387 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4392 if (guestdbg_exit_pending(vcpu)) {
4393 kvm_s390_prepare_debug_exit(vcpu);
4398 kvm_sigset_activate(vcpu);
4401 * no need to check the return value of vcpu_start as it can only have
4402 * an error for protvirt, but protvirt means user cpu state
4404 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4405 kvm_s390_vcpu_start(vcpu);
4406 } else if (is_vcpu_stopped(vcpu)) {
4407 pr_err_ratelimited("can't run stopped vcpu %d\n",
4414 enable_cpu_timer_accounting(vcpu);
4417 rc = __vcpu_run(vcpu);
4419 if (signal_pending(current) && !rc) {
4420 kvm_run->exit_reason = KVM_EXIT_INTR;
4424 if (guestdbg_exit_pending(vcpu) && !rc) {
4425 kvm_s390_prepare_debug_exit(vcpu);
4429 if (rc == -EREMOTE) {
4430 /* userspace support is needed, kvm_run has been prepared */
4434 disable_cpu_timer_accounting(vcpu);
4437 kvm_sigset_deactivate(vcpu);
4439 vcpu->stat.exit_userspace++;
4446 * store status at address
4447 * we use have two special cases:
4448 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4449 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4451 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4453 unsigned char archmode = 1;
4454 freg_t fprs[NUM_FPRS];
4459 px = kvm_s390_get_prefix(vcpu);
4460 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4461 if (write_guest_abs(vcpu, 163, &archmode, 1))
4464 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4465 if (write_guest_real(vcpu, 163, &archmode, 1))
4469 gpa -= __LC_FPREGS_SAVE_AREA;
4471 /* manually convert vector registers if necessary */
4472 if (MACHINE_HAS_VX) {
4473 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4474 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4477 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4478 vcpu->run->s.regs.fprs, 128);
4480 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4481 vcpu->run->s.regs.gprs, 128);
4482 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4483 &vcpu->arch.sie_block->gpsw, 16);
4484 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4486 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4487 &vcpu->run->s.regs.fpc, 4);
4488 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4489 &vcpu->arch.sie_block->todpr, 4);
4490 cputm = kvm_s390_get_cpu_timer(vcpu);
4491 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4493 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4494 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4496 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4497 &vcpu->run->s.regs.acrs, 64);
4498 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4499 &vcpu->arch.sie_block->gcr, 128);
4500 return rc ? -EFAULT : 0;
4503 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4506 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4507 * switch in the run ioctl. Let's update our copies before we save
4508 * it into the save area
4511 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4512 save_access_regs(vcpu->run->s.regs.acrs);
4514 return kvm_s390_store_status_unloaded(vcpu, addr);
4517 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4519 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4520 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4523 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4526 struct kvm_vcpu *vcpu;
4528 kvm_for_each_vcpu(i, vcpu, kvm) {
4529 __disable_ibs_on_vcpu(vcpu);
4533 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4537 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4538 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4541 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4543 int i, online_vcpus, r = 0, started_vcpus = 0;
4545 if (!is_vcpu_stopped(vcpu))
4548 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4549 /* Only one cpu at a time may enter/leave the STOPPED state. */
4550 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4551 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4553 /* Let's tell the UV that we want to change into the operating state */
4554 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4555 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4557 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4562 for (i = 0; i < online_vcpus; i++) {
4563 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4567 if (started_vcpus == 0) {
4568 /* we're the only active VCPU -> speed it up */
4569 __enable_ibs_on_vcpu(vcpu);
4570 } else if (started_vcpus == 1) {
4572 * As we are starting a second VCPU, we have to disable
4573 * the IBS facility on all VCPUs to remove potentially
4574 * outstanding ENABLE requests.
4576 __disable_ibs_on_all_vcpus(vcpu->kvm);
4579 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4581 * The real PSW might have changed due to a RESTART interpreted by the
4582 * ultravisor. We block all interrupts and let the next sie exit
4585 if (kvm_s390_pv_cpu_is_protected(vcpu))
4586 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4588 * Another VCPU might have used IBS while we were offline.
4589 * Let's play safe and flush the VCPU at startup.
4591 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4592 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4596 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4598 int i, online_vcpus, r = 0, started_vcpus = 0;
4599 struct kvm_vcpu *started_vcpu = NULL;
4601 if (is_vcpu_stopped(vcpu))
4604 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4605 /* Only one cpu at a time may enter/leave the STOPPED state. */
4606 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4607 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4609 /* Let's tell the UV that we want to change into the stopped state */
4610 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4611 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4613 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4618 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4619 kvm_s390_clear_stop_irq(vcpu);
4621 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4622 __disable_ibs_on_vcpu(vcpu);
4624 for (i = 0; i < online_vcpus; i++) {
4625 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4627 started_vcpu = vcpu->kvm->vcpus[i];
4631 if (started_vcpus == 1) {
4633 * As we only have one VCPU left, we want to enable the
4634 * IBS facility for that VCPU to speed it up.
4636 __enable_ibs_on_vcpu(started_vcpu);
4639 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4643 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4644 struct kvm_enable_cap *cap)
4652 case KVM_CAP_S390_CSS_SUPPORT:
4653 if (!vcpu->kvm->arch.css_support) {
4654 vcpu->kvm->arch.css_support = 1;
4655 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4656 trace_kvm_s390_enable_css(vcpu->kvm);
4667 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4668 struct kvm_s390_mem_op *mop)
4670 void __user *uaddr = (void __user *)mop->buf;
4673 if (mop->flags || !mop->size)
4675 if (mop->size + mop->sida_offset < mop->size)
4677 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4681 case KVM_S390_MEMOP_SIDA_READ:
4682 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4683 mop->sida_offset), mop->size))
4687 case KVM_S390_MEMOP_SIDA_WRITE:
4688 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4689 mop->sida_offset), uaddr, mop->size))
4695 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4696 struct kvm_s390_mem_op *mop)
4698 void __user *uaddr = (void __user *)mop->buf;
4699 void *tmpbuf = NULL;
4701 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4702 | KVM_S390_MEMOP_F_CHECK_ONLY;
4704 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4707 if (mop->size > MEM_OP_MAX_SIZE)
4710 if (kvm_s390_pv_cpu_is_protected(vcpu))
4713 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4714 tmpbuf = vmalloc(mop->size);
4720 case KVM_S390_MEMOP_LOGICAL_READ:
4721 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4722 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4723 mop->size, GACC_FETCH);
4726 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4728 if (copy_to_user(uaddr, tmpbuf, mop->size))
4732 case KVM_S390_MEMOP_LOGICAL_WRITE:
4733 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4734 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4735 mop->size, GACC_STORE);
4738 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4742 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4746 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4747 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4753 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4754 struct kvm_s390_mem_op *mop)
4758 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4761 case KVM_S390_MEMOP_LOGICAL_READ:
4762 case KVM_S390_MEMOP_LOGICAL_WRITE:
4763 r = kvm_s390_guest_mem_op(vcpu, mop);
4765 case KVM_S390_MEMOP_SIDA_READ:
4766 case KVM_S390_MEMOP_SIDA_WRITE:
4767 /* we are locked against sida going away by the vcpu->mutex */
4768 r = kvm_s390_guest_sida_op(vcpu, mop);
4774 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4778 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4779 unsigned int ioctl, unsigned long arg)
4781 struct kvm_vcpu *vcpu = filp->private_data;
4782 void __user *argp = (void __user *)arg;
4785 case KVM_S390_IRQ: {
4786 struct kvm_s390_irq s390irq;
4788 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4790 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4792 case KVM_S390_INTERRUPT: {
4793 struct kvm_s390_interrupt s390int;
4794 struct kvm_s390_irq s390irq = {};
4796 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4798 if (s390int_to_s390irq(&s390int, &s390irq))
4800 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4803 return -ENOIOCTLCMD;
4806 long kvm_arch_vcpu_ioctl(struct file *filp,
4807 unsigned int ioctl, unsigned long arg)
4809 struct kvm_vcpu *vcpu = filp->private_data;
4810 void __user *argp = (void __user *)arg;
4818 case KVM_S390_STORE_STATUS:
4819 idx = srcu_read_lock(&vcpu->kvm->srcu);
4820 r = kvm_s390_store_status_unloaded(vcpu, arg);
4821 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4823 case KVM_S390_SET_INITIAL_PSW: {
4827 if (copy_from_user(&psw, argp, sizeof(psw)))
4829 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4832 case KVM_S390_CLEAR_RESET:
4834 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4835 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4836 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4837 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4838 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4842 case KVM_S390_INITIAL_RESET:
4844 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4845 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4846 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4847 UVC_CMD_CPU_RESET_INITIAL,
4849 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4853 case KVM_S390_NORMAL_RESET:
4855 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4856 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4857 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4858 UVC_CMD_CPU_RESET, &rc, &rrc);
4859 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4863 case KVM_SET_ONE_REG:
4864 case KVM_GET_ONE_REG: {
4865 struct kvm_one_reg reg;
4867 if (kvm_s390_pv_cpu_is_protected(vcpu))
4870 if (copy_from_user(®, argp, sizeof(reg)))
4872 if (ioctl == KVM_SET_ONE_REG)
4873 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4875 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4878 #ifdef CONFIG_KVM_S390_UCONTROL
4879 case KVM_S390_UCAS_MAP: {
4880 struct kvm_s390_ucas_mapping ucasmap;
4882 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4887 if (!kvm_is_ucontrol(vcpu->kvm)) {
4892 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4893 ucasmap.vcpu_addr, ucasmap.length);
4896 case KVM_S390_UCAS_UNMAP: {
4897 struct kvm_s390_ucas_mapping ucasmap;
4899 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4904 if (!kvm_is_ucontrol(vcpu->kvm)) {
4909 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4914 case KVM_S390_VCPU_FAULT: {
4915 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4918 case KVM_ENABLE_CAP:
4920 struct kvm_enable_cap cap;
4922 if (copy_from_user(&cap, argp, sizeof(cap)))
4924 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4927 case KVM_S390_MEM_OP: {
4928 struct kvm_s390_mem_op mem_op;
4930 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4931 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4936 case KVM_S390_SET_IRQ_STATE: {
4937 struct kvm_s390_irq_state irq_state;
4940 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4942 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4943 irq_state.len == 0 ||
4944 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4948 /* do not use irq_state.flags, it will break old QEMUs */
4949 r = kvm_s390_set_irq_state(vcpu,
4950 (void __user *) irq_state.buf,
4954 case KVM_S390_GET_IRQ_STATE: {
4955 struct kvm_s390_irq_state irq_state;
4958 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4960 if (irq_state.len == 0) {
4964 /* do not use irq_state.flags, it will break old QEMUs */
4965 r = kvm_s390_get_irq_state(vcpu,
4966 (__u8 __user *) irq_state.buf,
4978 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4980 #ifdef CONFIG_KVM_S390_UCONTROL
4981 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4982 && (kvm_is_ucontrol(vcpu->kvm))) {
4983 vmf->page = virt_to_page(vcpu->arch.sie_block);
4984 get_page(vmf->page);
4988 return VM_FAULT_SIGBUS;
4991 /* Section: memory related */
4992 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4993 struct kvm_memory_slot *memslot,
4994 const struct kvm_userspace_memory_region *mem,
4995 enum kvm_mr_change change)
4997 /* A few sanity checks. We can have memory slots which have to be
4998 located/ended at a segment boundary (1MB). The memory in userland is
4999 ok to be fragmented into various different vmas. It is okay to mmap()
5000 and munmap() stuff in this slot after doing this call at any time */
5002 if (mem->userspace_addr & 0xffffful)
5005 if (mem->memory_size & 0xffffful)
5008 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5011 /* When we are protected, we should not change the memory slots */
5012 if (kvm_s390_pv_get_handle(kvm))
5017 void kvm_arch_commit_memory_region(struct kvm *kvm,
5018 const struct kvm_userspace_memory_region *mem,
5019 struct kvm_memory_slot *old,
5020 const struct kvm_memory_slot *new,
5021 enum kvm_mr_change change)
5027 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5028 old->npages * PAGE_SIZE);
5031 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5032 old->npages * PAGE_SIZE);
5037 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5038 mem->guest_phys_addr, mem->memory_size);
5040 case KVM_MR_FLAGS_ONLY:
5043 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5046 pr_warn("failed to commit memory region\n");
5050 static inline unsigned long nonhyp_mask(int i)
5052 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5054 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5057 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5059 vcpu->valid_wakeup = false;
5062 static int __init kvm_s390_init(void)
5066 if (!sclp.has_sief2) {
5067 pr_info("SIE is not available\n");
5071 if (nested && hpage) {
5072 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5076 for (i = 0; i < 16; i++)
5077 kvm_s390_fac_base[i] |=
5078 stfle_fac_list[i] & nonhyp_mask(i);
5080 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5083 static void __exit kvm_s390_exit(void)
5088 module_init(kvm_s390_init);
5089 module_exit(kvm_s390_exit);
5092 * Enable autoloading of the kvm module.
5093 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5094 * since x86 takes a different approach.
5096 #include <linux/miscdevice.h>
5097 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5098 MODULE_ALIAS("devname:kvm");