1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
62 KVM_GENERIC_VM_STATS(),
63 STATS_DESC_COUNTER(VM, inject_io),
64 STATS_DESC_COUNTER(VM, inject_float_mchk),
65 STATS_DESC_COUNTER(VM, inject_pfault_done),
66 STATS_DESC_COUNTER(VM, inject_service_signal),
67 STATS_DESC_COUNTER(VM, inject_virtio)
70 const struct kvm_stats_header kvm_vm_stats_header = {
71 .name_size = KVM_STATS_NAME_SIZE,
72 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
73 .id_offset = sizeof(struct kvm_stats_header),
74 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
75 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
76 sizeof(kvm_vm_stats_desc),
79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
80 KVM_GENERIC_VCPU_STATS(),
81 STATS_DESC_COUNTER(VCPU, exit_userspace),
82 STATS_DESC_COUNTER(VCPU, exit_null),
83 STATS_DESC_COUNTER(VCPU, exit_external_request),
84 STATS_DESC_COUNTER(VCPU, exit_io_request),
85 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
86 STATS_DESC_COUNTER(VCPU, exit_stop_request),
87 STATS_DESC_COUNTER(VCPU, exit_validity),
88 STATS_DESC_COUNTER(VCPU, exit_instruction),
89 STATS_DESC_COUNTER(VCPU, exit_pei),
90 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
91 STATS_DESC_COUNTER(VCPU, instruction_lctl),
92 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
93 STATS_DESC_COUNTER(VCPU, instruction_stctl),
94 STATS_DESC_COUNTER(VCPU, instruction_stctg),
95 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
96 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
97 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
98 STATS_DESC_COUNTER(VCPU, deliver_ckc),
99 STATS_DESC_COUNTER(VCPU, deliver_cputm),
100 STATS_DESC_COUNTER(VCPU, deliver_external_call),
101 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
102 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
103 STATS_DESC_COUNTER(VCPU, deliver_virtio),
104 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
106 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
107 STATS_DESC_COUNTER(VCPU, deliver_program),
108 STATS_DESC_COUNTER(VCPU, deliver_io),
109 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
110 STATS_DESC_COUNTER(VCPU, exit_wait_state),
111 STATS_DESC_COUNTER(VCPU, inject_ckc),
112 STATS_DESC_COUNTER(VCPU, inject_cputm),
113 STATS_DESC_COUNTER(VCPU, inject_external_call),
114 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
115 STATS_DESC_COUNTER(VCPU, inject_mchk),
116 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
117 STATS_DESC_COUNTER(VCPU, inject_program),
118 STATS_DESC_COUNTER(VCPU, inject_restart),
119 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
120 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
121 STATS_DESC_COUNTER(VCPU, instruction_epsw),
122 STATS_DESC_COUNTER(VCPU, instruction_gs),
123 STATS_DESC_COUNTER(VCPU, instruction_io_other),
124 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
125 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
126 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
127 STATS_DESC_COUNTER(VCPU, instruction_ptff),
128 STATS_DESC_COUNTER(VCPU, instruction_sck),
129 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
130 STATS_DESC_COUNTER(VCPU, instruction_stidp),
131 STATS_DESC_COUNTER(VCPU, instruction_spx),
132 STATS_DESC_COUNTER(VCPU, instruction_stpx),
133 STATS_DESC_COUNTER(VCPU, instruction_stap),
134 STATS_DESC_COUNTER(VCPU, instruction_iske),
135 STATS_DESC_COUNTER(VCPU, instruction_ri),
136 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
137 STATS_DESC_COUNTER(VCPU, instruction_sske),
138 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
139 STATS_DESC_COUNTER(VCPU, instruction_stsi),
140 STATS_DESC_COUNTER(VCPU, instruction_stfl),
141 STATS_DESC_COUNTER(VCPU, instruction_tb),
142 STATS_DESC_COUNTER(VCPU, instruction_tpi),
143 STATS_DESC_COUNTER(VCPU, instruction_tprot),
144 STATS_DESC_COUNTER(VCPU, instruction_tsch),
145 STATS_DESC_COUNTER(VCPU, instruction_sie),
146 STATS_DESC_COUNTER(VCPU, instruction_essa),
147 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
149 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
163 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
166 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
167 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
168 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
172 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
173 STATS_DESC_COUNTER(VCPU, pfault_sync)
176 const struct kvm_stats_header kvm_vcpu_stats_header = {
177 .name_size = KVM_STATS_NAME_SIZE,
178 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
179 .id_offset = sizeof(struct kvm_stats_header),
180 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
181 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
182 sizeof(kvm_vcpu_stats_desc),
185 /* allow nested virtualization in KVM (if enabled by user space) */
187 module_param(nested, int, S_IRUGO);
188 MODULE_PARM_DESC(nested, "Nested virtualization support");
190 /* allow 1m huge page guest backing, if !nested */
192 module_param(hpage, int, 0444);
193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
195 /* maximum percentage of steal time for polling. >100 is treated like 100 */
196 static u8 halt_poll_max_steal = 10;
197 module_param(halt_poll_max_steal, byte, 0644);
198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
200 /* if set to true, the GISA will be initialized and used if available */
201 static bool use_gisa = true;
202 module_param(use_gisa, bool, 0644);
203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
205 /* maximum diag9c forwarding per second */
206 unsigned int diag9c_forwarding_hz;
207 module_param(diag9c_forwarding_hz, uint, 0644);
208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
211 * For now we handle at most 16 double words as this is what the s390 base
212 * kernel handles and stores in the prefix page. If we ever need to go beyond
213 * this, this requires changes to code, but the external uapi can stay.
215 #define SIZE_INTERNAL 16
218 * Base feature mask that defines default mask for facilities. Consists of the
219 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
223 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
224 * and defines the facilities that can be enabled via a cpu model.
226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
228 static unsigned long kvm_s390_fac_size(void)
230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
231 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
232 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
233 sizeof(stfle_fac_list));
235 return SIZE_INTERNAL;
238 /* available cpu features supported by kvm */
239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
240 /* available subfunctions indicated via query / "test bit" */
241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
243 static struct gmap_notifier gmap_notifier;
244 static struct gmap_notifier vsie_gmap_notifier;
245 debug_info_t *kvm_s390_dbf;
246 debug_info_t *kvm_s390_dbf_uv;
248 /* Section: not file related */
249 int kvm_arch_hardware_enable(void)
251 /* every s390 is virtualization enabled ;-) */
255 int kvm_arch_check_processor_compat(void *opaque)
260 /* forward declarations */
261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
263 static int sca_switch_to_extended(struct kvm *kvm);
265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
270 * The TOD jumps by delta, we have to compensate this by adding
271 * -delta to the epoch.
275 /* sign-extension - we're adding to signed values below */
280 if (scb->ecd & ECD_MEF) {
281 scb->epdx += delta_idx;
282 if (scb->epoch < delta)
288 * This callback is executed during stop_machine(). All CPUs are therefore
289 * temporarily stopped. In order not to change guest behavior, we have to
290 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
291 * so a CPU won't be stopped while calculating with the epoch.
293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
297 struct kvm_vcpu *vcpu;
299 unsigned long long *delta = v;
301 list_for_each_entry(kvm, &vm_list, vm_list) {
302 kvm_for_each_vcpu(i, vcpu, kvm) {
303 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
305 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
306 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
308 if (vcpu->arch.cputm_enabled)
309 vcpu->arch.cputm_start += *delta;
310 if (vcpu->arch.vsie_block)
311 kvm_clock_sync_scb(vcpu->arch.vsie_block,
318 static struct notifier_block kvm_clock_notifier = {
319 .notifier_call = kvm_clock_sync,
322 int kvm_arch_hardware_setup(void *opaque)
324 gmap_notifier.notifier_call = kvm_gmap_notifier;
325 gmap_register_pte_notifier(&gmap_notifier);
326 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
327 gmap_register_pte_notifier(&vsie_gmap_notifier);
328 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
329 &kvm_clock_notifier);
333 void kvm_arch_hardware_unsetup(void)
335 gmap_unregister_pte_notifier(&gmap_notifier);
336 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
337 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
338 &kvm_clock_notifier);
341 static void allow_cpu_feat(unsigned long nr)
343 set_bit_inv(nr, kvm_s390_available_cpu_feat);
346 static inline int plo_test_bit(unsigned char nr)
348 unsigned long function = (unsigned long)nr | 0x100;
352 " lgr 0,%[function]\n"
353 /* Parameter registers are ignored for "test bit" */
358 : [function] "d" (function)
363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
368 /* Parameter registers are ignored */
369 " .insn rrf,%[opc] << 16,2,4,6,0\n"
371 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
372 : "cc", "memory", "0", "1");
375 #define INSN_SORTL 0xb938
376 #define INSN_DFLTCC 0xb939
378 static void kvm_s390_cpu_feat_init(void)
382 for (i = 0; i < 256; ++i) {
384 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
387 if (test_facility(28)) /* TOD-clock steering */
388 ptff(kvm_s390_available_subfunc.ptff,
389 sizeof(kvm_s390_available_subfunc.ptff),
392 if (test_facility(17)) { /* MSA */
393 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
394 kvm_s390_available_subfunc.kmac);
395 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
396 kvm_s390_available_subfunc.kmc);
397 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
398 kvm_s390_available_subfunc.km);
399 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
400 kvm_s390_available_subfunc.kimd);
401 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
402 kvm_s390_available_subfunc.klmd);
404 if (test_facility(76)) /* MSA3 */
405 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
406 kvm_s390_available_subfunc.pckmo);
407 if (test_facility(77)) { /* MSA4 */
408 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
409 kvm_s390_available_subfunc.kmctr);
410 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kmf);
412 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
413 kvm_s390_available_subfunc.kmo);
414 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
415 kvm_s390_available_subfunc.pcc);
417 if (test_facility(57)) /* MSA5 */
418 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
419 kvm_s390_available_subfunc.ppno);
421 if (test_facility(146)) /* MSA8 */
422 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
423 kvm_s390_available_subfunc.kma);
425 if (test_facility(155)) /* MSA9 */
426 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
427 kvm_s390_available_subfunc.kdsa);
429 if (test_facility(150)) /* SORTL */
430 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
432 if (test_facility(151)) /* DFLTCC */
433 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
435 if (MACHINE_HAS_ESOP)
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
438 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
439 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
441 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
442 !test_facility(3) || !nested)
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
445 if (sclp.has_64bscao)
446 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
448 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
450 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
452 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
454 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
456 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
458 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
460 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
462 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
463 * all skey handling functions read/set the skey from the PGSTE
464 * instead of the real storage key.
466 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
467 * pages being detected as preserved although they are resident.
469 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
470 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
472 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
473 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
474 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
476 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
477 * cannot easily shadow the SCA because of the ipte lock.
481 int kvm_arch_init(void *opaque)
485 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
489 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
490 if (!kvm_s390_dbf_uv)
493 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
494 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
497 kvm_s390_cpu_feat_init();
499 /* Register floating interrupt controller interface. */
500 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
502 pr_err("A FLIC registration call failed with rc=%d\n", rc);
506 rc = kvm_s390_gib_init(GAL_ISC);
517 void kvm_arch_exit(void)
519 kvm_s390_gib_destroy();
520 debug_unregister(kvm_s390_dbf);
521 debug_unregister(kvm_s390_dbf_uv);
524 /* Section: device related */
525 long kvm_arch_dev_ioctl(struct file *filp,
526 unsigned int ioctl, unsigned long arg)
528 if (ioctl == KVM_S390_ENABLE_SIE)
529 return s390_enable_sie();
533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
538 case KVM_CAP_S390_PSW:
539 case KVM_CAP_S390_GMAP:
540 case KVM_CAP_SYNC_MMU:
541 #ifdef CONFIG_KVM_S390_UCONTROL
542 case KVM_CAP_S390_UCONTROL:
544 case KVM_CAP_ASYNC_PF:
545 case KVM_CAP_SYNC_REGS:
546 case KVM_CAP_ONE_REG:
547 case KVM_CAP_ENABLE_CAP:
548 case KVM_CAP_S390_CSS_SUPPORT:
549 case KVM_CAP_IOEVENTFD:
550 case KVM_CAP_DEVICE_CTRL:
551 case KVM_CAP_S390_IRQCHIP:
552 case KVM_CAP_VM_ATTRIBUTES:
553 case KVM_CAP_MP_STATE:
554 case KVM_CAP_IMMEDIATE_EXIT:
555 case KVM_CAP_S390_INJECT_IRQ:
556 case KVM_CAP_S390_USER_SIGP:
557 case KVM_CAP_S390_USER_STSI:
558 case KVM_CAP_S390_SKEYS:
559 case KVM_CAP_S390_IRQ_STATE:
560 case KVM_CAP_S390_USER_INSTR0:
561 case KVM_CAP_S390_CMMA_MIGRATION:
562 case KVM_CAP_S390_AIS:
563 case KVM_CAP_S390_AIS_MIGRATION:
564 case KVM_CAP_S390_VCPU_RESETS:
565 case KVM_CAP_SET_GUEST_DEBUG:
566 case KVM_CAP_S390_DIAG318:
569 case KVM_CAP_SET_GUEST_DEBUG2:
570 r = KVM_GUESTDBG_VALID_MASK;
572 case KVM_CAP_S390_HPAGE_1M:
574 if (hpage && !kvm_is_ucontrol(kvm))
577 case KVM_CAP_S390_MEM_OP:
580 case KVM_CAP_NR_VCPUS:
581 case KVM_CAP_MAX_VCPUS:
582 case KVM_CAP_MAX_VCPU_ID:
583 r = KVM_S390_BSCA_CPU_SLOTS;
584 if (!kvm_s390_use_sca_entries())
586 else if (sclp.has_esca && sclp.has_64bscao)
587 r = KVM_S390_ESCA_CPU_SLOTS;
588 if (ext == KVM_CAP_NR_VCPUS)
589 r = min_t(unsigned int, num_online_cpus(), r);
591 case KVM_CAP_S390_COW:
592 r = MACHINE_HAS_ESOP;
594 case KVM_CAP_S390_VECTOR_REGISTERS:
597 case KVM_CAP_S390_RI:
598 r = test_facility(64);
600 case KVM_CAP_S390_GS:
601 r = test_facility(133);
603 case KVM_CAP_S390_BPB:
604 r = test_facility(82);
606 case KVM_CAP_S390_PROTECTED:
607 r = is_prot_virt_host();
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
618 gfn_t cur_gfn, last_gfn;
619 unsigned long gaddr, vmaddr;
620 struct gmap *gmap = kvm->arch.gmap;
621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
623 /* Loop over all guest segments */
624 cur_gfn = memslot->base_gfn;
625 last_gfn = memslot->base_gfn + memslot->npages;
626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 gaddr = gfn_to_gpa(cur_gfn);
628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 if (kvm_is_error_hva(vmaddr))
632 bitmap_zero(bitmap, _PAGE_ENTRIES);
633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 for (i = 0; i < _PAGE_ENTRIES; i++) {
635 if (test_bit(i, bitmap))
636 mark_page_dirty(kvm, cur_gfn + i);
639 if (fatal_signal_pending(current))
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
649 * Get (and clear) the dirty memory log for a memory slot.
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 struct kvm_dirty_log *log)
656 struct kvm_memory_slot *memslot;
659 if (kvm_is_ucontrol(kvm))
662 mutex_lock(&kvm->slots_lock);
665 if (log->slot >= KVM_USER_MEM_SLOTS)
668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
672 /* Clear the dirty log */
674 n = kvm_dirty_bitmap_bytes(memslot);
675 memset(memslot->dirty_bitmap, 0, n);
679 mutex_unlock(&kvm->slots_lock);
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
686 struct kvm_vcpu *vcpu;
688 kvm_for_each_vcpu(i, vcpu, kvm) {
689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
701 case KVM_CAP_S390_IRQCHIP:
702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 kvm->arch.use_irqchip = 1;
706 case KVM_CAP_S390_USER_SIGP:
707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 kvm->arch.user_sigp = 1;
711 case KVM_CAP_S390_VECTOR_REGISTERS:
712 mutex_lock(&kvm->lock);
713 if (kvm->created_vcpus) {
715 } else if (MACHINE_HAS_VX) {
716 set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 set_kvm_facility(kvm->arch.model.fac_list, 129);
718 if (test_facility(134)) {
719 set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 set_kvm_facility(kvm->arch.model.fac_list, 134);
722 if (test_facility(135)) {
723 set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 set_kvm_facility(kvm->arch.model.fac_list, 135);
726 if (test_facility(148)) {
727 set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 set_kvm_facility(kvm->arch.model.fac_list, 148);
730 if (test_facility(152)) {
731 set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 set_kvm_facility(kvm->arch.model.fac_list, 152);
734 if (test_facility(192)) {
735 set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 set_kvm_facility(kvm->arch.model.fac_list, 192);
741 mutex_unlock(&kvm->lock);
742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 r ? "(not available)" : "(success)");
745 case KVM_CAP_S390_RI:
747 mutex_lock(&kvm->lock);
748 if (kvm->created_vcpus) {
750 } else if (test_facility(64)) {
751 set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 set_kvm_facility(kvm->arch.model.fac_list, 64);
755 mutex_unlock(&kvm->lock);
756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 r ? "(not available)" : "(success)");
759 case KVM_CAP_S390_AIS:
760 mutex_lock(&kvm->lock);
761 if (kvm->created_vcpus) {
764 set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 set_kvm_facility(kvm->arch.model.fac_list, 72);
768 mutex_unlock(&kvm->lock);
769 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 r ? "(not available)" : "(success)");
772 case KVM_CAP_S390_GS:
774 mutex_lock(&kvm->lock);
775 if (kvm->created_vcpus) {
777 } else if (test_facility(133)) {
778 set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 set_kvm_facility(kvm->arch.model.fac_list, 133);
782 mutex_unlock(&kvm->lock);
783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 r ? "(not available)" : "(success)");
786 case KVM_CAP_S390_HPAGE_1M:
787 mutex_lock(&kvm->lock);
788 if (kvm->created_vcpus)
790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
794 mmap_write_lock(kvm->mm);
795 kvm->mm->context.allow_gmap_hpage_1m = 1;
796 mmap_write_unlock(kvm->mm);
798 * We might have to create fake 4k page
799 * tables. To avoid that the hardware works on
800 * stale PGSTEs, we emulate these instructions.
802 kvm->arch.use_skf = 0;
803 kvm->arch.use_pfmfi = 0;
805 mutex_unlock(&kvm->lock);
806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 r ? "(not available)" : "(success)");
809 case KVM_CAP_S390_USER_STSI:
810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 kvm->arch.user_stsi = 1;
814 case KVM_CAP_S390_USER_INSTR0:
815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 kvm->arch.user_instr0 = 1;
817 icpt_operexc_on_all_vcpus(kvm);
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
831 switch (attr->attr) {
832 case KVM_S390_VM_MEM_LIMIT_SIZE:
834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 kvm->arch.mem_limit);
836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
850 switch (attr->attr) {
851 case KVM_S390_VM_MEM_ENABLE_CMMA:
856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 mutex_lock(&kvm->lock);
858 if (kvm->created_vcpus)
860 else if (kvm->mm->context.allow_gmap_hpage_1m)
863 kvm->arch.use_cmma = 1;
864 /* Not compatible with cmma. */
865 kvm->arch.use_pfmfi = 0;
868 mutex_unlock(&kvm->lock);
870 case KVM_S390_VM_MEM_CLR_CMMA:
875 if (!kvm->arch.use_cmma)
878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 mutex_lock(&kvm->lock);
880 idx = srcu_read_lock(&kvm->srcu);
881 s390_reset_cmma(kvm->arch.gmap->mm);
882 srcu_read_unlock(&kvm->srcu, idx);
883 mutex_unlock(&kvm->lock);
886 case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 unsigned long new_limit;
889 if (kvm_is_ucontrol(kvm))
892 if (get_user(new_limit, (u64 __user *)attr->addr))
895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 new_limit > kvm->arch.mem_limit)
902 /* gmap_create takes last usable address */
903 if (new_limit != KVM_S390_NO_MEM_LIMIT)
907 mutex_lock(&kvm->lock);
908 if (!kvm->created_vcpus) {
909 /* gmap_create will round the limit up */
910 struct gmap *new = gmap_create(current->mm, new_limit);
915 gmap_remove(kvm->arch.gmap);
917 kvm->arch.gmap = new;
921 mutex_unlock(&kvm->lock);
922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 (void *) kvm->arch.gmap->asce);
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
938 struct kvm_vcpu *vcpu;
941 kvm_s390_vcpu_block_all(kvm);
943 kvm_for_each_vcpu(i, vcpu, kvm) {
944 kvm_s390_vcpu_crypto_setup(vcpu);
945 /* recreate the shadow crycb by leaving the VSIE handler */
946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
949 kvm_s390_vcpu_unblock_all(kvm);
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
954 mutex_lock(&kvm->lock);
955 switch (attr->attr) {
956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 if (!test_kvm_facility(kvm, 76)) {
958 mutex_unlock(&kvm->lock);
962 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 kvm->arch.crypto.aes_kw = 1;
965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 if (!test_kvm_facility(kvm, 76)) {
969 mutex_unlock(&kvm->lock);
973 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 kvm->arch.crypto.dea_kw = 1;
976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 if (!test_kvm_facility(kvm, 76)) {
980 mutex_unlock(&kvm->lock);
983 kvm->arch.crypto.aes_kw = 0;
984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 if (!test_kvm_facility(kvm, 76)) {
990 mutex_unlock(&kvm->lock);
993 kvm->arch.crypto.dea_kw = 0;
994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
998 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 if (!ap_instructions_available()) {
1000 mutex_unlock(&kvm->lock);
1003 kvm->arch.crypto.apie = 1;
1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 if (!ap_instructions_available()) {
1007 mutex_unlock(&kvm->lock);
1010 kvm->arch.crypto.apie = 0;
1013 mutex_unlock(&kvm->lock);
1017 kvm_s390_vcpu_crypto_reset_all(kvm);
1018 mutex_unlock(&kvm->lock);
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1025 struct kvm_vcpu *vcpu;
1027 kvm_for_each_vcpu(cx, vcpu, kvm)
1028 kvm_s390_sync_request(req, vcpu);
1032 * Must be called with kvm->srcu held to avoid races on memslots, and with
1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1037 struct kvm_memory_slot *ms;
1038 struct kvm_memslots *slots;
1039 unsigned long ram_pages = 0;
1042 /* migration mode already enabled */
1043 if (kvm->arch.migration_mode)
1045 slots = kvm_memslots(kvm);
1046 if (!slots || kvm_memslots_empty(slots))
1049 if (!kvm->arch.use_cmma) {
1050 kvm->arch.migration_mode = 1;
1053 /* mark all the pages in active slots as dirty */
1054 kvm_for_each_memslot(ms, bkt, slots) {
1055 if (!ms->dirty_bitmap)
1058 * The second half of the bitmap is only used on x86,
1059 * and would be wasted otherwise, so we put it to good
1060 * use here to keep track of the state of the storage
1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 ram_pages += ms->npages;
1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 kvm->arch.migration_mode = 1;
1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1073 * Must be called with kvm->slots_lock to avoid races with ourselves and
1074 * kvm_s390_vm_start_migration.
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1078 /* migration mode already disabled */
1079 if (!kvm->arch.migration_mode)
1081 kvm->arch.migration_mode = 0;
1082 if (kvm->arch.use_cmma)
1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 struct kvm_device_attr *attr)
1092 mutex_lock(&kvm->slots_lock);
1093 switch (attr->attr) {
1094 case KVM_S390_VM_MIGRATION_START:
1095 res = kvm_s390_vm_start_migration(kvm);
1097 case KVM_S390_VM_MIGRATION_STOP:
1098 res = kvm_s390_vm_stop_migration(kvm);
1103 mutex_unlock(&kvm->slots_lock);
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 struct kvm_device_attr *attr)
1111 u64 mig = kvm->arch.migration_mode;
1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 struct kvm_s390_vm_tod_clock gtod;
1125 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130 kvm_s390_set_tod_clock(kvm, >od);
1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 gtod.epoch_idx, gtod.tod);
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1142 if (copy_from_user(>od_high, (void __user *)attr->addr,
1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 struct kvm_s390_vm_tod_clock gtod = { 0 };
1157 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1161 kvm_s390_set_tod_clock(kvm, >od);
1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1173 switch (attr->attr) {
1174 case KVM_S390_VM_TOD_EXT:
1175 ret = kvm_s390_set_tod_ext(kvm, attr);
1177 case KVM_S390_VM_TOD_HIGH:
1178 ret = kvm_s390_set_tod_high(kvm, attr);
1180 case KVM_S390_VM_TOD_LOW:
1181 ret = kvm_s390_set_tod_low(kvm, attr);
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 struct kvm_s390_vm_tod_clock *gtod)
1193 union tod_clock clk;
1197 store_tod_clock_ext(&clk);
1199 gtod->tod = clk.tod + kvm->arch.epoch;
1200 gtod->epoch_idx = 0;
1201 if (test_kvm_facility(kvm, 139)) {
1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 if (gtod->tod < clk.tod)
1204 gtod->epoch_idx += 1;
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1212 struct kvm_s390_vm_tod_clock gtod;
1214 memset(>od, 0, sizeof(gtod));
1215 kvm_s390_get_tod_clock(kvm, >od);
1216 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 gtod.epoch_idx, gtod.tod);
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228 if (copy_to_user((void __user *)attr->addr, >od_high,
1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1240 gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1255 switch (attr->attr) {
1256 case KVM_S390_VM_TOD_EXT:
1257 ret = kvm_s390_get_tod_ext(kvm, attr);
1259 case KVM_S390_VM_TOD_HIGH:
1260 ret = kvm_s390_get_tod_high(kvm, attr);
1262 case KVM_S390_VM_TOD_LOW:
1263 ret = kvm_s390_get_tod_low(kvm, attr);
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1274 struct kvm_s390_vm_cpu_processor *proc;
1275 u16 lowest_ibc, unblocked_ibc;
1278 mutex_lock(&kvm->lock);
1279 if (kvm->created_vcpus) {
1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1288 if (!copy_from_user(proc, (void __user *)attr->addr,
1290 kvm->arch.model.cpuid = proc->cpuid;
1291 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 unblocked_ibc = sclp.ibc & 0xfff;
1293 if (lowest_ibc && proc->ibc) {
1294 if (proc->ibc > unblocked_ibc)
1295 kvm->arch.model.ibc = unblocked_ibc;
1296 else if (proc->ibc < lowest_ibc)
1297 kvm->arch.model.ibc = lowest_ibc;
1299 kvm->arch.model.ibc = proc->ibc;
1301 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 kvm->arch.model.ibc,
1305 kvm->arch.model.cpuid);
1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 kvm->arch.model.fac_list[0],
1308 kvm->arch.model.fac_list[1],
1309 kvm->arch.model.fac_list[2]);
1314 mutex_unlock(&kvm->lock);
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 struct kvm_device_attr *attr)
1321 struct kvm_s390_vm_cpu_feat data;
1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1325 if (!bitmap_subset((unsigned long *) data.feat,
1326 kvm_s390_available_cpu_feat,
1327 KVM_S390_VM_CPU_FEAT_NR_BITS))
1330 mutex_lock(&kvm->lock);
1331 if (kvm->created_vcpus) {
1332 mutex_unlock(&kvm->lock);
1335 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336 KVM_S390_VM_CPU_FEAT_NR_BITS);
1337 mutex_unlock(&kvm->lock);
1338 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346 struct kvm_device_attr *attr)
1348 mutex_lock(&kvm->lock);
1349 if (kvm->created_vcpus) {
1350 mutex_unlock(&kvm->lock);
1354 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356 mutex_unlock(&kvm->lock);
1359 mutex_unlock(&kvm->lock);
1361 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1426 switch (attr->attr) {
1427 case KVM_S390_VM_CPU_PROCESSOR:
1428 ret = kvm_s390_set_processor(kvm, attr);
1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431 ret = kvm_s390_set_processor_feat(kvm, attr);
1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1442 struct kvm_s390_vm_cpu_processor *proc;
1445 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1450 proc->cpuid = kvm->arch.model.cpuid;
1451 proc->ibc = kvm->arch.model.ibc;
1452 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453 S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 kvm->arch.model.ibc,
1456 kvm->arch.model.cpuid);
1457 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 kvm->arch.model.fac_list[0],
1459 kvm->arch.model.fac_list[1],
1460 kvm->arch.model.fac_list[2]);
1461 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1470 struct kvm_s390_vm_cpu_machine *mach;
1473 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1478 get_cpu_id((struct cpuid *) &mach->cpuid);
1479 mach->ibc = sclp.ibc;
1480 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481 S390_ARCH_FAC_LIST_SIZE_BYTE);
1482 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483 sizeof(stfle_fac_list));
1484 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1485 kvm->arch.model.ibc,
1486 kvm->arch.model.cpuid);
1487 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1491 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1495 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503 struct kvm_device_attr *attr)
1505 struct kvm_s390_vm_cpu_feat data;
1507 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508 KVM_S390_VM_CPU_FEAT_NR_BITS);
1509 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1511 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519 struct kvm_device_attr *attr)
1521 struct kvm_s390_vm_cpu_feat data;
1523 bitmap_copy((unsigned long *) data.feat,
1524 kvm_s390_available_cpu_feat,
1525 KVM_S390_VM_CPU_FEAT_NR_BITS);
1526 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1528 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536 struct kvm_device_attr *attr)
1538 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1542 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604 struct kvm_device_attr *attr)
1606 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1610 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1675 switch (attr->attr) {
1676 case KVM_S390_VM_CPU_PROCESSOR:
1677 ret = kvm_s390_get_processor(kvm, attr);
1679 case KVM_S390_VM_CPU_MACHINE:
1680 ret = kvm_s390_get_machine(kvm, attr);
1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683 ret = kvm_s390_get_processor_feat(kvm, attr);
1685 case KVM_S390_VM_CPU_MACHINE_FEAT:
1686 ret = kvm_s390_get_machine_feat(kvm, attr);
1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702 switch (attr->group) {
1703 case KVM_S390_VM_MEM_CTRL:
1704 ret = kvm_s390_set_mem_control(kvm, attr);
1706 case KVM_S390_VM_TOD:
1707 ret = kvm_s390_set_tod(kvm, attr);
1709 case KVM_S390_VM_CPU_MODEL:
1710 ret = kvm_s390_set_cpu_model(kvm, attr);
1712 case KVM_S390_VM_CRYPTO:
1713 ret = kvm_s390_vm_set_crypto(kvm, attr);
1715 case KVM_S390_VM_MIGRATION:
1716 ret = kvm_s390_vm_set_migration(kvm, attr);
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1730 switch (attr->group) {
1731 case KVM_S390_VM_MEM_CTRL:
1732 ret = kvm_s390_get_mem_control(kvm, attr);
1734 case KVM_S390_VM_TOD:
1735 ret = kvm_s390_get_tod(kvm, attr);
1737 case KVM_S390_VM_CPU_MODEL:
1738 ret = kvm_s390_get_cpu_model(kvm, attr);
1740 case KVM_S390_VM_MIGRATION:
1741 ret = kvm_s390_vm_get_migration(kvm, attr);
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1755 switch (attr->group) {
1756 case KVM_S390_VM_MEM_CTRL:
1757 switch (attr->attr) {
1758 case KVM_S390_VM_MEM_ENABLE_CMMA:
1759 case KVM_S390_VM_MEM_CLR_CMMA:
1760 ret = sclp.has_cmma ? 0 : -ENXIO;
1762 case KVM_S390_VM_MEM_LIMIT_SIZE:
1770 case KVM_S390_VM_TOD:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_TOD_LOW:
1773 case KVM_S390_VM_TOD_HIGH:
1781 case KVM_S390_VM_CPU_MODEL:
1782 switch (attr->attr) {
1783 case KVM_S390_VM_CPU_PROCESSOR:
1784 case KVM_S390_VM_CPU_MACHINE:
1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786 case KVM_S390_VM_CPU_MACHINE_FEAT:
1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1796 case KVM_S390_VM_CRYPTO:
1797 switch (attr->attr) {
1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806 ret = ap_instructions_available() ? 0 : -ENXIO;
1813 case KVM_S390_VM_MIGRATION:
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1828 int srcu_idx, i, r = 0;
1830 if (args->flags != 0)
1833 /* Is this guest using storage keys? */
1834 if (!mm_uses_skeys(current->mm))
1835 return KVM_S390_GET_SKEYS_NONE;
1837 /* Enforce sane limit on memory allocation */
1838 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1841 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1845 mmap_read_lock(current->mm);
1846 srcu_idx = srcu_read_lock(&kvm->srcu);
1847 for (i = 0; i < args->count; i++) {
1848 hva = gfn_to_hva(kvm, args->start_gfn + i);
1849 if (kvm_is_error_hva(hva)) {
1854 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1858 srcu_read_unlock(&kvm->srcu, srcu_idx);
1859 mmap_read_unlock(current->mm);
1862 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863 sizeof(uint8_t) * args->count);
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1876 int srcu_idx, i, r = 0;
1879 if (args->flags != 0)
1882 /* Enforce sane limit on memory allocation */
1883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1886 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1890 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891 sizeof(uint8_t) * args->count);
1897 /* Enable storage key handling for the guest */
1898 r = s390_enable_skey();
1903 mmap_read_lock(current->mm);
1904 srcu_idx = srcu_read_lock(&kvm->srcu);
1905 while (i < args->count) {
1907 hva = gfn_to_hva(kvm, args->start_gfn + i);
1908 if (kvm_is_error_hva(hva)) {
1913 /* Lowest order bit is reserved */
1914 if (keys[i] & 0x01) {
1919 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1921 r = fixup_user_fault(current->mm, hva,
1922 FAULT_FLAG_WRITE, &unlocked);
1929 srcu_read_unlock(&kvm->srcu, srcu_idx);
1930 mmap_read_unlock(current->mm);
1937 * Base address and length must be sent at the start of each block, therefore
1938 * it's cheaper to send some clean data, as long as it's less than the size of
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946 u8 *res, unsigned long bufsize)
1948 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1951 while (args->count < bufsize) {
1952 hva = gfn_to_hva(kvm, cur_gfn);
1954 * We return an error if the first value was invalid, but we
1955 * return successfully if at least one value was copied.
1957 if (kvm_is_error_hva(hva))
1958 return args->count ? 0 : -EFAULT;
1959 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1961 res[args->count++] = (pgstev >> 24) & 0x43;
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1971 return ____gfn_to_memslot(slots, gfn, true);
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975 unsigned long cur_gfn)
1977 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978 unsigned long ofs = cur_gfn - ms->base_gfn;
1979 struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1981 if (ms->base_gfn + ms->npages <= cur_gfn) {
1982 mnode = rb_next(mnode);
1983 /* If we are above the highest slot, wrap around */
1985 mnode = rb_first(&slots->gfn_tree);
1987 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991 while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1995 return ms->base_gfn + ofs;
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 u8 *res, unsigned long bufsize)
2001 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 struct kvm_memslots *slots = kvm_memslots(kvm);
2003 struct kvm_memory_slot *ms;
2005 if (unlikely(kvm_memslots_empty(slots)))
2008 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 ms = gfn_to_memslot(kvm, cur_gfn);
2011 args->start_gfn = cur_gfn;
2014 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 mem_end = kvm_s390_get_gfn_end(slots);
2017 while (args->count < bufsize) {
2018 hva = gfn_to_hva(kvm, cur_gfn);
2019 if (kvm_is_error_hva(hva))
2021 /* Decrement only if we actually flipped the bit to 0 */
2022 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2026 /* Save the value */
2027 res[args->count++] = (pgstev >> 24) & 0x43;
2028 /* If the next bit is too far away, stop. */
2029 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2031 /* If we reached the previous "next", find the next one */
2032 if (cur_gfn == next_gfn)
2033 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 /* Reached the end of memory or of the buffer, stop */
2035 if ((next_gfn >= mem_end) ||
2036 (next_gfn - args->start_gfn >= bufsize))
2039 /* Reached the end of the current memslot, take the next one. */
2040 if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 ms = gfn_to_memslot(kvm, cur_gfn);
2050 * This function searches for the next page with dirty CMMA attributes, and
2051 * saves the attributes in the buffer up to either the end of the buffer or
2052 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053 * no trailing clean bytes are saved.
2054 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055 * output buffer will indicate 0 as length.
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 struct kvm_s390_cmma_log *args)
2060 unsigned long bufsize;
2061 int srcu_idx, peek, ret;
2064 if (!kvm->arch.use_cmma)
2066 /* Invalid/unsupported flags were specified */
2067 if (args->flags & ~KVM_S390_CMMA_PEEK)
2069 /* Migration mode query, and we are not doing a migration */
2070 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 if (!peek && !kvm->arch.migration_mode)
2073 /* CMMA is disabled or was not used, or the buffer has length zero */
2074 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 memset(args, 0, sizeof(*args));
2079 /* We are not peeking, and there are no dirty pages */
2080 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 memset(args, 0, sizeof(*args));
2085 values = vmalloc(bufsize);
2089 mmap_read_lock(kvm->mm);
2090 srcu_idx = srcu_read_lock(&kvm->srcu);
2092 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2094 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 mmap_read_unlock(kvm->mm);
2098 if (kvm->arch.migration_mode)
2099 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2101 args->remaining = 0;
2103 if (copy_to_user((void __user *)args->values, values, args->count))
2111 * This function sets the CMMA attributes for the given pages. If the input
2112 * buffer has zero length, no action is taken, otherwise the attributes are
2113 * set and the mm->context.uses_cmm flag is set.
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 const struct kvm_s390_cmma_log *args)
2118 unsigned long hva, mask, pgstev, i;
2120 int srcu_idx, r = 0;
2124 if (!kvm->arch.use_cmma)
2126 /* invalid/unsupported flags */
2127 if (args->flags != 0)
2129 /* Enforce sane limit on memory allocation */
2130 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2133 if (args->count == 0)
2136 bits = vmalloc(array_size(sizeof(*bits), args->count));
2140 r = copy_from_user(bits, (void __user *)args->values, args->count);
2146 mmap_read_lock(kvm->mm);
2147 srcu_idx = srcu_read_lock(&kvm->srcu);
2148 for (i = 0; i < args->count; i++) {
2149 hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 if (kvm_is_error_hva(hva)) {
2156 pgstev = pgstev << 24;
2157 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2160 srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 mmap_read_unlock(kvm->mm);
2163 if (!kvm->mm->context.uses_cmm) {
2164 mmap_write_lock(kvm->mm);
2165 kvm->mm->context.uses_cmm = 1;
2166 mmap_write_unlock(kvm->mm);
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2175 struct kvm_vcpu *vcpu;
2181 * We ignore failures and try to destroy as many CPUs as possible.
2182 * At the same time we must not free the assigned resources when
2183 * this fails, as the ultravisor has still access to that memory.
2184 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2186 * We want to return the first failure rc and rrc, though.
2188 kvm_for_each_vcpu(i, vcpu, kvm) {
2189 mutex_lock(&vcpu->mutex);
2190 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2195 mutex_unlock(&vcpu->mutex);
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2206 struct kvm_vcpu *vcpu;
2208 kvm_for_each_vcpu(i, vcpu, kvm) {
2209 mutex_lock(&vcpu->mutex);
2210 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2211 mutex_unlock(&vcpu->mutex);
2216 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 void __user *argp = (void __user *)cmd->data;
2227 case KVM_PV_ENABLE: {
2229 if (kvm_s390_pv_is_protected(kvm))
2233 * FMT 4 SIE needs esca. As we never switch back to bsca from
2234 * esca, we need no cleanup in the error cases below
2236 r = sca_switch_to_extended(kvm);
2240 mmap_write_lock(current->mm);
2241 r = gmap_mark_unmergeable();
2242 mmap_write_unlock(current->mm);
2246 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2252 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2254 /* we need to block service interrupts from now on */
2255 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2258 case KVM_PV_DISABLE: {
2260 if (!kvm_s390_pv_is_protected(kvm))
2263 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2265 * If a CPU could not be destroyed, destroy VM will also fail.
2266 * There is no point in trying to destroy it. Instead return
2267 * the rc and rrc from the first CPU that failed destroying.
2271 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2273 /* no need to block service interrupts any more */
2274 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2277 case KVM_PV_SET_SEC_PARMS: {
2278 struct kvm_s390_pv_sec_parm parms = {};
2282 if (!kvm_s390_pv_is_protected(kvm))
2286 if (copy_from_user(&parms, argp, sizeof(parms)))
2289 /* Currently restricted to 8KB */
2291 if (parms.length > PAGE_SIZE * 2)
2295 hdr = vmalloc(parms.length);
2300 if (!copy_from_user(hdr, (void __user *)parms.origin,
2302 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2303 &cmd->rc, &cmd->rrc);
2308 case KVM_PV_UNPACK: {
2309 struct kvm_s390_pv_unp unp = {};
2312 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2316 if (copy_from_user(&unp, argp, sizeof(unp)))
2319 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2320 &cmd->rc, &cmd->rrc);
2323 case KVM_PV_VERIFY: {
2325 if (!kvm_s390_pv_is_protected(kvm))
2328 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2329 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2330 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334 case KVM_PV_PREP_RESET: {
2336 if (!kvm_s390_pv_is_protected(kvm))
2339 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2340 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2341 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345 case KVM_PV_UNSHARE_ALL: {
2347 if (!kvm_s390_pv_is_protected(kvm))
2350 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2352 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2362 long kvm_arch_vm_ioctl(struct file *filp,
2363 unsigned int ioctl, unsigned long arg)
2365 struct kvm *kvm = filp->private_data;
2366 void __user *argp = (void __user *)arg;
2367 struct kvm_device_attr attr;
2371 case KVM_S390_INTERRUPT: {
2372 struct kvm_s390_interrupt s390int;
2375 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2377 r = kvm_s390_inject_vm(kvm, &s390int);
2380 case KVM_CREATE_IRQCHIP: {
2381 struct kvm_irq_routing_entry routing;
2384 if (kvm->arch.use_irqchip) {
2385 /* Set up dummy routing. */
2386 memset(&routing, 0, sizeof(routing));
2387 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2391 case KVM_SET_DEVICE_ATTR: {
2393 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2395 r = kvm_s390_vm_set_attr(kvm, &attr);
2398 case KVM_GET_DEVICE_ATTR: {
2400 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2402 r = kvm_s390_vm_get_attr(kvm, &attr);
2405 case KVM_HAS_DEVICE_ATTR: {
2407 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2409 r = kvm_s390_vm_has_attr(kvm, &attr);
2412 case KVM_S390_GET_SKEYS: {
2413 struct kvm_s390_skeys args;
2416 if (copy_from_user(&args, argp,
2417 sizeof(struct kvm_s390_skeys)))
2419 r = kvm_s390_get_skeys(kvm, &args);
2422 case KVM_S390_SET_SKEYS: {
2423 struct kvm_s390_skeys args;
2426 if (copy_from_user(&args, argp,
2427 sizeof(struct kvm_s390_skeys)))
2429 r = kvm_s390_set_skeys(kvm, &args);
2432 case KVM_S390_GET_CMMA_BITS: {
2433 struct kvm_s390_cmma_log args;
2436 if (copy_from_user(&args, argp, sizeof(args)))
2438 mutex_lock(&kvm->slots_lock);
2439 r = kvm_s390_get_cmma_bits(kvm, &args);
2440 mutex_unlock(&kvm->slots_lock);
2442 r = copy_to_user(argp, &args, sizeof(args));
2448 case KVM_S390_SET_CMMA_BITS: {
2449 struct kvm_s390_cmma_log args;
2452 if (copy_from_user(&args, argp, sizeof(args)))
2454 mutex_lock(&kvm->slots_lock);
2455 r = kvm_s390_set_cmma_bits(kvm, &args);
2456 mutex_unlock(&kvm->slots_lock);
2459 case KVM_S390_PV_COMMAND: {
2460 struct kvm_pv_cmd args;
2462 /* protvirt means user cpu state */
2463 kvm_s390_set_user_cpu_state_ctrl(kvm);
2465 if (!is_prot_virt_host()) {
2469 if (copy_from_user(&args, argp, sizeof(args))) {
2477 mutex_lock(&kvm->lock);
2478 r = kvm_s390_handle_pv(kvm, &args);
2479 mutex_unlock(&kvm->lock);
2480 if (copy_to_user(argp, &args, sizeof(args))) {
2493 static int kvm_s390_apxa_installed(void)
2495 struct ap_config_info info;
2497 if (ap_instructions_available()) {
2498 if (ap_qci(&info) == 0)
2506 * The format of the crypto control block (CRYCB) is specified in the 3 low
2507 * order bits of the CRYCB designation (CRYCBD) field as follows:
2508 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2509 * AP extended addressing (APXA) facility are installed.
2510 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2511 * Format 2: Both the APXA and MSAX3 facilities are installed
2513 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2515 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2517 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2518 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2520 /* Check whether MSAX3 is installed */
2521 if (!test_kvm_facility(kvm, 76))
2524 if (kvm_s390_apxa_installed())
2525 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2527 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2531 * kvm_arch_crypto_set_masks
2533 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2535 * @apm: the mask identifying the accessible AP adapters
2536 * @aqm: the mask identifying the accessible AP domains
2537 * @adm: the mask identifying the accessible AP control domains
2539 * Set the masks that identify the adapters, domains and control domains to
2540 * which the KVM guest is granted access.
2542 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2545 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2546 unsigned long *aqm, unsigned long *adm)
2548 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2550 kvm_s390_vcpu_block_all(kvm);
2552 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2553 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2554 memcpy(crycb->apcb1.apm, apm, 32);
2555 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2556 apm[0], apm[1], apm[2], apm[3]);
2557 memcpy(crycb->apcb1.aqm, aqm, 32);
2558 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2559 aqm[0], aqm[1], aqm[2], aqm[3]);
2560 memcpy(crycb->apcb1.adm, adm, 32);
2561 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2562 adm[0], adm[1], adm[2], adm[3]);
2565 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2566 memcpy(crycb->apcb0.apm, apm, 8);
2567 memcpy(crycb->apcb0.aqm, aqm, 2);
2568 memcpy(crycb->apcb0.adm, adm, 2);
2569 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2570 apm[0], *((unsigned short *)aqm),
2571 *((unsigned short *)adm));
2573 default: /* Can not happen */
2577 /* recreate the shadow crycb for each vcpu */
2578 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2579 kvm_s390_vcpu_unblock_all(kvm);
2581 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2584 * kvm_arch_crypto_clear_masks
2586 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2589 * Clear the masks that identify the adapters, domains and control domains to
2590 * which the KVM guest is granted access.
2592 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2595 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2597 kvm_s390_vcpu_block_all(kvm);
2599 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2600 sizeof(kvm->arch.crypto.crycb->apcb0));
2601 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2602 sizeof(kvm->arch.crypto.crycb->apcb1));
2604 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2605 /* recreate the shadow crycb for each vcpu */
2606 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2607 kvm_s390_vcpu_unblock_all(kvm);
2609 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2611 static u64 kvm_s390_get_initial_cpuid(void)
2616 cpuid.version = 0xff;
2617 return *((u64 *) &cpuid);
2620 static void kvm_s390_crypto_init(struct kvm *kvm)
2622 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2623 kvm_s390_set_crycb_format(kvm);
2624 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2626 if (!test_kvm_facility(kvm, 76))
2629 /* Enable AES/DEA protected key functions by default */
2630 kvm->arch.crypto.aes_kw = 1;
2631 kvm->arch.crypto.dea_kw = 1;
2632 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2633 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2634 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2635 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2638 static void sca_dispose(struct kvm *kvm)
2640 if (kvm->arch.use_esca)
2641 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2643 free_page((unsigned long)(kvm->arch.sca));
2644 kvm->arch.sca = NULL;
2647 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2649 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2651 char debug_name[16];
2652 static unsigned long sca_offset;
2655 #ifdef CONFIG_KVM_S390_UCONTROL
2656 if (type & ~KVM_VM_S390_UCONTROL)
2658 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2665 rc = s390_enable_sie();
2671 if (!sclp.has_64bscao)
2672 alloc_flags |= GFP_DMA;
2673 rwlock_init(&kvm->arch.sca_lock);
2674 /* start with basic SCA */
2675 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2678 mutex_lock(&kvm_lock);
2680 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2682 kvm->arch.sca = (struct bsca_block *)
2683 ((char *) kvm->arch.sca + sca_offset);
2684 mutex_unlock(&kvm_lock);
2686 sprintf(debug_name, "kvm-%u", current->pid);
2688 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2692 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2693 kvm->arch.sie_page2 =
2694 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2695 if (!kvm->arch.sie_page2)
2698 kvm->arch.sie_page2->kvm = kvm;
2699 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2701 for (i = 0; i < kvm_s390_fac_size(); i++) {
2702 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2703 (kvm_s390_fac_base[i] |
2704 kvm_s390_fac_ext[i]);
2705 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2706 kvm_s390_fac_base[i];
2708 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2710 /* we are always in czam mode - even on pre z14 machines */
2711 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2712 set_kvm_facility(kvm->arch.model.fac_list, 138);
2713 /* we emulate STHYI in kvm */
2714 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2715 set_kvm_facility(kvm->arch.model.fac_list, 74);
2716 if (MACHINE_HAS_TLB_GUEST) {
2717 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2718 set_kvm_facility(kvm->arch.model.fac_list, 147);
2721 if (css_general_characteristics.aiv && test_facility(65))
2722 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2724 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2725 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2727 kvm_s390_crypto_init(kvm);
2729 mutex_init(&kvm->arch.float_int.ais_lock);
2730 spin_lock_init(&kvm->arch.float_int.lock);
2731 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2732 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2733 init_waitqueue_head(&kvm->arch.ipte_wq);
2734 mutex_init(&kvm->arch.ipte_mutex);
2736 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2737 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2739 if (type & KVM_VM_S390_UCONTROL) {
2740 kvm->arch.gmap = NULL;
2741 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2743 if (sclp.hamax == U64_MAX)
2744 kvm->arch.mem_limit = TASK_SIZE_MAX;
2746 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2748 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2749 if (!kvm->arch.gmap)
2751 kvm->arch.gmap->private = kvm;
2752 kvm->arch.gmap->pfault_enabled = 0;
2755 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2756 kvm->arch.use_skf = sclp.has_skey;
2757 spin_lock_init(&kvm->arch.start_stop_lock);
2758 kvm_s390_vsie_init(kvm);
2760 kvm_s390_gisa_init(kvm);
2761 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2765 free_page((unsigned long)kvm->arch.sie_page2);
2766 debug_unregister(kvm->arch.dbf);
2768 KVM_EVENT(3, "creation of vm failed: %d", rc);
2772 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2776 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2777 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2778 kvm_s390_clear_local_irqs(vcpu);
2779 kvm_clear_async_pf_completion_queue(vcpu);
2780 if (!kvm_is_ucontrol(vcpu->kvm))
2783 if (kvm_is_ucontrol(vcpu->kvm))
2784 gmap_remove(vcpu->arch.gmap);
2786 if (vcpu->kvm->arch.use_cmma)
2787 kvm_s390_vcpu_unsetup_cmma(vcpu);
2788 /* We can not hold the vcpu mutex here, we are already dying */
2789 if (kvm_s390_pv_cpu_get_handle(vcpu))
2790 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2791 free_page((unsigned long)(vcpu->arch.sie_block));
2794 void kvm_arch_destroy_vm(struct kvm *kvm)
2798 kvm_destroy_vcpus(kvm);
2800 kvm_s390_gisa_destroy(kvm);
2802 * We are already at the end of life and kvm->lock is not taken.
2803 * This is ok as the file descriptor is closed by now and nobody
2804 * can mess with the pv state. To avoid lockdep_assert_held from
2805 * complaining we do not use kvm_s390_pv_is_protected.
2807 if (kvm_s390_pv_get_handle(kvm))
2808 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2809 debug_unregister(kvm->arch.dbf);
2810 free_page((unsigned long)kvm->arch.sie_page2);
2811 if (!kvm_is_ucontrol(kvm))
2812 gmap_remove(kvm->arch.gmap);
2813 kvm_s390_destroy_adapters(kvm);
2814 kvm_s390_clear_float_irqs(kvm);
2815 kvm_s390_vsie_destroy(kvm);
2816 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2819 /* Section: vcpu related */
2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2822 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2823 if (!vcpu->arch.gmap)
2825 vcpu->arch.gmap->private = vcpu->kvm;
2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2832 if (!kvm_s390_use_sca_entries())
2834 read_lock(&vcpu->kvm->arch.sca_lock);
2835 if (vcpu->kvm->arch.use_esca) {
2836 struct esca_block *sca = vcpu->kvm->arch.sca;
2838 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2839 sca->cpu[vcpu->vcpu_id].sda = 0;
2841 struct bsca_block *sca = vcpu->kvm->arch.sca;
2843 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2844 sca->cpu[vcpu->vcpu_id].sda = 0;
2846 read_unlock(&vcpu->kvm->arch.sca_lock);
2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2851 if (!kvm_s390_use_sca_entries()) {
2852 struct bsca_block *sca = vcpu->kvm->arch.sca;
2854 /* we still need the basic sca for the ipte control */
2855 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2859 read_lock(&vcpu->kvm->arch.sca_lock);
2860 if (vcpu->kvm->arch.use_esca) {
2861 struct esca_block *sca = vcpu->kvm->arch.sca;
2863 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2866 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2867 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2869 struct bsca_block *sca = vcpu->kvm->arch.sca;
2871 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2872 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2873 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2874 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2876 read_unlock(&vcpu->kvm->arch.sca_lock);
2879 /* Basic SCA to Extended SCA data copy routines */
2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2883 d->sigp_ctrl.c = s->sigp_ctrl.c;
2884 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2891 d->ipte_control = s->ipte_control;
2893 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2894 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2897 static int sca_switch_to_extended(struct kvm *kvm)
2899 struct bsca_block *old_sca = kvm->arch.sca;
2900 struct esca_block *new_sca;
2901 struct kvm_vcpu *vcpu;
2902 unsigned long vcpu_idx;
2905 if (kvm->arch.use_esca)
2908 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2912 scaoh = (u32)((u64)(new_sca) >> 32);
2913 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2915 kvm_s390_vcpu_block_all(kvm);
2916 write_lock(&kvm->arch.sca_lock);
2918 sca_copy_b_to_e(new_sca, old_sca);
2920 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2921 vcpu->arch.sie_block->scaoh = scaoh;
2922 vcpu->arch.sie_block->scaol = scaol;
2923 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2925 kvm->arch.sca = new_sca;
2926 kvm->arch.use_esca = 1;
2928 write_unlock(&kvm->arch.sca_lock);
2929 kvm_s390_vcpu_unblock_all(kvm);
2931 free_page((unsigned long)old_sca);
2933 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2934 old_sca, kvm->arch.sca);
2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2942 if (!kvm_s390_use_sca_entries()) {
2943 if (id < KVM_MAX_VCPUS)
2947 if (id < KVM_S390_BSCA_CPU_SLOTS)
2949 if (!sclp.has_esca || !sclp.has_64bscao)
2952 mutex_lock(&kvm->lock);
2953 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2954 mutex_unlock(&kvm->lock);
2956 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2962 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2963 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964 vcpu->arch.cputm_start = get_tod_clock_fast();
2965 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2971 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2972 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2973 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2974 vcpu->arch.cputm_start = 0;
2975 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2981 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2982 vcpu->arch.cputm_enabled = true;
2983 __start_cpu_timer_accounting(vcpu);
2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2989 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2990 __stop_cpu_timer_accounting(vcpu);
2991 vcpu->arch.cputm_enabled = false;
2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2996 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2997 __enable_cpu_timer_accounting(vcpu);
3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3003 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004 __disable_cpu_timer_accounting(vcpu);
3008 /* set the cpu timer - may only be called from the VCPU thread itself */
3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3011 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3012 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3013 if (vcpu->arch.cputm_enabled)
3014 vcpu->arch.cputm_start = get_tod_clock_fast();
3015 vcpu->arch.sie_block->cputm = cputm;
3016 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3020 /* update and get the cpu timer - can also be called from other VCPU threads */
3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3026 if (unlikely(!vcpu->arch.cputm_enabled))
3027 return vcpu->arch.sie_block->cputm;
3029 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3031 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3033 * If the writer would ever execute a read in the critical
3034 * section, e.g. in irq context, we have a deadlock.
3036 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3037 value = vcpu->arch.sie_block->cputm;
3038 /* if cputm_start is 0, accounting is being started/stopped */
3039 if (likely(vcpu->arch.cputm_start))
3040 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3041 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3049 gmap_enable(vcpu->arch.enabled_gmap);
3050 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3051 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052 __start_cpu_timer_accounting(vcpu);
3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3059 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3060 __stop_cpu_timer_accounting(vcpu);
3061 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3062 vcpu->arch.enabled_gmap = gmap_get_enabled();
3063 gmap_disable(vcpu->arch.enabled_gmap);
3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3069 mutex_lock(&vcpu->kvm->lock);
3071 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3072 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3074 mutex_unlock(&vcpu->kvm->lock);
3075 if (!kvm_is_ucontrol(vcpu->kvm)) {
3076 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3079 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3080 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3081 /* make vcpu_load load the right gmap on the first trigger */
3082 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3087 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3088 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3095 /* At least one ECC subfunction must be present */
3096 return kvm_has_pckmo_subfunc(kvm, 32) ||
3097 kvm_has_pckmo_subfunc(kvm, 33) ||
3098 kvm_has_pckmo_subfunc(kvm, 34) ||
3099 kvm_has_pckmo_subfunc(kvm, 40) ||
3100 kvm_has_pckmo_subfunc(kvm, 41);
3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3107 * If the AP instructions are not being interpreted and the MSAX3
3108 * facility is not configured for the guest, there is nothing to set up.
3110 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3113 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3114 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3115 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3116 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3118 if (vcpu->kvm->arch.crypto.apie)
3119 vcpu->arch.sie_block->eca |= ECA_APIE;
3121 /* Set up protected key support */
3122 if (vcpu->kvm->arch.crypto.aes_kw) {
3123 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3124 /* ecc is also wrapped with AES key */
3125 if (kvm_has_pckmo_ecc(vcpu->kvm))
3126 vcpu->arch.sie_block->ecd |= ECD_ECC;
3129 if (vcpu->kvm->arch.crypto.dea_kw)
3130 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3135 free_page(vcpu->arch.sie_block->cbrlo);
3136 vcpu->arch.sie_block->cbrlo = 0;
3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3141 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3142 if (!vcpu->arch.sie_block->cbrlo)
3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3149 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3151 vcpu->arch.sie_block->ibc = model->ibc;
3152 if (test_kvm_facility(vcpu->kvm, 7))
3153 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3161 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3165 if (test_kvm_facility(vcpu->kvm, 78))
3166 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3167 else if (test_kvm_facility(vcpu->kvm, 8))
3168 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3170 kvm_s390_vcpu_setup_model(vcpu);
3172 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3173 if (MACHINE_HAS_ESOP)
3174 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3175 if (test_kvm_facility(vcpu->kvm, 9))
3176 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3177 if (test_kvm_facility(vcpu->kvm, 73))
3178 vcpu->arch.sie_block->ecb |= ECB_TE;
3179 if (!kvm_is_ucontrol(vcpu->kvm))
3180 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3182 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3183 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3184 if (test_kvm_facility(vcpu->kvm, 130))
3185 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3186 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3188 vcpu->arch.sie_block->eca |= ECA_CEI;
3190 vcpu->arch.sie_block->eca |= ECA_IB;
3192 vcpu->arch.sie_block->eca |= ECA_SII;
3193 if (sclp.has_sigpif)
3194 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3195 if (test_kvm_facility(vcpu->kvm, 129)) {
3196 vcpu->arch.sie_block->eca |= ECA_VX;
3197 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3199 if (test_kvm_facility(vcpu->kvm, 139))
3200 vcpu->arch.sie_block->ecd |= ECD_MEF;
3201 if (test_kvm_facility(vcpu->kvm, 156))
3202 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3203 if (vcpu->arch.sie_block->gd) {
3204 vcpu->arch.sie_block->eca |= ECA_AIV;
3205 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3206 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3208 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3210 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3213 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3215 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3217 if (vcpu->kvm->arch.use_cmma) {
3218 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3222 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3223 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3225 vcpu->arch.sie_block->hpid = HPID_KVM;
3227 kvm_s390_vcpu_crypto_setup(vcpu);
3229 mutex_lock(&vcpu->kvm->lock);
3230 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3231 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3233 kvm_s390_vcpu_unsetup_cmma(vcpu);
3235 mutex_unlock(&vcpu->kvm->lock);
3240 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3242 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3247 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3249 struct sie_page *sie_page;
3252 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3253 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3257 vcpu->arch.sie_block = &sie_page->sie_block;
3258 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3260 /* the real guest size will always be smaller than msl */
3261 vcpu->arch.sie_block->mso = 0;
3262 vcpu->arch.sie_block->msl = sclp.hamax;
3264 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3265 spin_lock_init(&vcpu->arch.local_int.lock);
3266 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3267 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3268 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3269 seqcount_init(&vcpu->arch.cputm_seqcount);
3271 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3272 kvm_clear_async_pf_completion_queue(vcpu);
3273 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3280 kvm_s390_set_prefix(vcpu, 0);
3281 if (test_kvm_facility(vcpu->kvm, 64))
3282 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3283 if (test_kvm_facility(vcpu->kvm, 82))
3284 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3285 if (test_kvm_facility(vcpu->kvm, 133))
3286 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3287 if (test_kvm_facility(vcpu->kvm, 156))
3288 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3289 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3290 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3293 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3295 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3297 if (kvm_is_ucontrol(vcpu->kvm)) {
3298 rc = __kvm_ucontrol_vcpu_init(vcpu);
3300 goto out_free_sie_block;
3303 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3304 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3305 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3307 rc = kvm_s390_vcpu_setup(vcpu);
3309 goto out_ucontrol_uninit;
3312 out_ucontrol_uninit:
3313 if (kvm_is_ucontrol(vcpu->kvm))
3314 gmap_remove(vcpu->arch.gmap);
3316 free_page((unsigned long)(vcpu->arch.sie_block));
3320 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3322 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3323 return kvm_s390_vcpu_has_irq(vcpu, 0);
3326 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3328 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3331 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3333 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3337 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3339 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3342 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3344 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3348 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3350 return atomic_read(&vcpu->arch.sie_block->prog20) &
3351 (PROG_BLOCK_SIE | PROG_REQUEST);
3354 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3356 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3360 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3361 * If the CPU is not running (e.g. waiting as idle) the function will
3362 * return immediately. */
3363 void exit_sie(struct kvm_vcpu *vcpu)
3365 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3366 kvm_s390_vsie_kick(vcpu);
3367 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3371 /* Kick a guest cpu out of SIE to process a request synchronously */
3372 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3374 kvm_make_request(req, vcpu);
3375 kvm_s390_vcpu_request(vcpu);
3378 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3381 struct kvm *kvm = gmap->private;
3382 struct kvm_vcpu *vcpu;
3383 unsigned long prefix;
3386 if (gmap_is_shadow(gmap))
3388 if (start >= 1UL << 31)
3389 /* We are only interested in prefix pages */
3391 kvm_for_each_vcpu(i, vcpu, kvm) {
3392 /* match against both prefix pages */
3393 prefix = kvm_s390_get_prefix(vcpu);
3394 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3395 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3397 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3402 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3404 /* do not poll with more than halt_poll_max_steal percent of steal time */
3405 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3406 READ_ONCE(halt_poll_max_steal)) {
3407 vcpu->stat.halt_no_poll_steal++;
3413 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3415 /* kvm common code refers to this, but never calls it */
3420 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3421 struct kvm_one_reg *reg)
3426 case KVM_REG_S390_TODPR:
3427 r = put_user(vcpu->arch.sie_block->todpr,
3428 (u32 __user *)reg->addr);
3430 case KVM_REG_S390_EPOCHDIFF:
3431 r = put_user(vcpu->arch.sie_block->epoch,
3432 (u64 __user *)reg->addr);
3434 case KVM_REG_S390_CPU_TIMER:
3435 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3436 (u64 __user *)reg->addr);
3438 case KVM_REG_S390_CLOCK_COMP:
3439 r = put_user(vcpu->arch.sie_block->ckc,
3440 (u64 __user *)reg->addr);
3442 case KVM_REG_S390_PFTOKEN:
3443 r = put_user(vcpu->arch.pfault_token,
3444 (u64 __user *)reg->addr);
3446 case KVM_REG_S390_PFCOMPARE:
3447 r = put_user(vcpu->arch.pfault_compare,
3448 (u64 __user *)reg->addr);
3450 case KVM_REG_S390_PFSELECT:
3451 r = put_user(vcpu->arch.pfault_select,
3452 (u64 __user *)reg->addr);
3454 case KVM_REG_S390_PP:
3455 r = put_user(vcpu->arch.sie_block->pp,
3456 (u64 __user *)reg->addr);
3458 case KVM_REG_S390_GBEA:
3459 r = put_user(vcpu->arch.sie_block->gbea,
3460 (u64 __user *)reg->addr);
3469 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3470 struct kvm_one_reg *reg)
3476 case KVM_REG_S390_TODPR:
3477 r = get_user(vcpu->arch.sie_block->todpr,
3478 (u32 __user *)reg->addr);
3480 case KVM_REG_S390_EPOCHDIFF:
3481 r = get_user(vcpu->arch.sie_block->epoch,
3482 (u64 __user *)reg->addr);
3484 case KVM_REG_S390_CPU_TIMER:
3485 r = get_user(val, (u64 __user *)reg->addr);
3487 kvm_s390_set_cpu_timer(vcpu, val);
3489 case KVM_REG_S390_CLOCK_COMP:
3490 r = get_user(vcpu->arch.sie_block->ckc,
3491 (u64 __user *)reg->addr);
3493 case KVM_REG_S390_PFTOKEN:
3494 r = get_user(vcpu->arch.pfault_token,
3495 (u64 __user *)reg->addr);
3496 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3497 kvm_clear_async_pf_completion_queue(vcpu);
3499 case KVM_REG_S390_PFCOMPARE:
3500 r = get_user(vcpu->arch.pfault_compare,
3501 (u64 __user *)reg->addr);
3503 case KVM_REG_S390_PFSELECT:
3504 r = get_user(vcpu->arch.pfault_select,
3505 (u64 __user *)reg->addr);
3507 case KVM_REG_S390_PP:
3508 r = get_user(vcpu->arch.sie_block->pp,
3509 (u64 __user *)reg->addr);
3511 case KVM_REG_S390_GBEA:
3512 r = get_user(vcpu->arch.sie_block->gbea,
3513 (u64 __user *)reg->addr);
3522 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3524 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3525 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3526 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3528 kvm_clear_async_pf_completion_queue(vcpu);
3529 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3530 kvm_s390_vcpu_stop(vcpu);
3531 kvm_s390_clear_local_irqs(vcpu);
3534 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3536 /* Initial reset is a superset of the normal reset */
3537 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3540 * This equals initial cpu reset in pop, but we don't switch to ESA.
3541 * We do not only reset the internal data, but also ...
3543 vcpu->arch.sie_block->gpsw.mask = 0;
3544 vcpu->arch.sie_block->gpsw.addr = 0;
3545 kvm_s390_set_prefix(vcpu, 0);
3546 kvm_s390_set_cpu_timer(vcpu, 0);
3547 vcpu->arch.sie_block->ckc = 0;
3548 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3549 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3550 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3552 /* ... the data in sync regs */
3553 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3554 vcpu->run->s.regs.ckc = 0;
3555 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3556 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3557 vcpu->run->psw_addr = 0;
3558 vcpu->run->psw_mask = 0;
3559 vcpu->run->s.regs.todpr = 0;
3560 vcpu->run->s.regs.cputm = 0;
3561 vcpu->run->s.regs.ckc = 0;
3562 vcpu->run->s.regs.pp = 0;
3563 vcpu->run->s.regs.gbea = 1;
3564 vcpu->run->s.regs.fpc = 0;
3566 * Do not reset these registers in the protected case, as some of
3567 * them are overlayed and they are not accessible in this case
3570 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3571 vcpu->arch.sie_block->gbea = 1;
3572 vcpu->arch.sie_block->pp = 0;
3573 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3574 vcpu->arch.sie_block->todpr = 0;
3578 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3580 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3582 /* Clear reset is a superset of the initial reset */
3583 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3585 memset(®s->gprs, 0, sizeof(regs->gprs));
3586 memset(®s->vrs, 0, sizeof(regs->vrs));
3587 memset(®s->acrs, 0, sizeof(regs->acrs));
3588 memset(®s->gscb, 0, sizeof(regs->gscb));
3591 regs->etoken_extension = 0;
3594 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3597 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3602 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3605 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3610 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3611 struct kvm_sregs *sregs)
3615 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3616 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3622 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3623 struct kvm_sregs *sregs)
3627 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3628 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3634 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3640 if (test_fp_ctl(fpu->fpc)) {
3644 vcpu->run->s.regs.fpc = fpu->fpc;
3646 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3647 (freg_t *) fpu->fprs);
3649 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3656 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3660 /* make sure we have the latest values */
3663 convert_vx_to_fp((freg_t *) fpu->fprs,
3664 (__vector128 *) vcpu->run->s.regs.vrs);
3666 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3667 fpu->fpc = vcpu->run->s.regs.fpc;
3673 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3677 if (!is_vcpu_stopped(vcpu))
3680 vcpu->run->psw_mask = psw.mask;
3681 vcpu->run->psw_addr = psw.addr;
3686 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3687 struct kvm_translation *tr)
3689 return -EINVAL; /* not implemented yet */
3692 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3693 KVM_GUESTDBG_USE_HW_BP | \
3694 KVM_GUESTDBG_ENABLE)
3696 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3697 struct kvm_guest_debug *dbg)
3703 vcpu->guest_debug = 0;
3704 kvm_s390_clear_bp_data(vcpu);
3706 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3710 if (!sclp.has_gpere) {
3715 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3716 vcpu->guest_debug = dbg->control;
3717 /* enforce guest PER */
3718 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3720 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3721 rc = kvm_s390_import_bp_data(vcpu, dbg);
3723 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3724 vcpu->arch.guestdbg.last_bp = 0;
3728 vcpu->guest_debug = 0;
3729 kvm_s390_clear_bp_data(vcpu);
3730 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3738 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3739 struct kvm_mp_state *mp_state)
3745 /* CHECK_STOP and LOAD are not supported yet */
3746 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3747 KVM_MP_STATE_OPERATING;
3753 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3754 struct kvm_mp_state *mp_state)
3760 /* user space knows about this interface - let it control the state */
3761 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3763 switch (mp_state->mp_state) {
3764 case KVM_MP_STATE_STOPPED:
3765 rc = kvm_s390_vcpu_stop(vcpu);
3767 case KVM_MP_STATE_OPERATING:
3768 rc = kvm_s390_vcpu_start(vcpu);
3770 case KVM_MP_STATE_LOAD:
3771 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3775 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3777 case KVM_MP_STATE_CHECK_STOP:
3778 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3787 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3789 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3792 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3795 kvm_s390_vcpu_request_handled(vcpu);
3796 if (!kvm_request_pending(vcpu))
3799 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3800 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3801 * This ensures that the ipte instruction for this request has
3802 * already finished. We might race against a second unmapper that
3803 * wants to set the blocking bit. Lets just retry the request loop.
3805 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3807 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3808 kvm_s390_get_prefix(vcpu),
3809 PAGE_SIZE * 2, PROT_WRITE);
3811 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3817 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3818 vcpu->arch.sie_block->ihcpu = 0xffff;
3822 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3823 if (!ibs_enabled(vcpu)) {
3824 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3825 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3830 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3831 if (ibs_enabled(vcpu)) {
3832 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3833 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3838 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3839 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3843 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3845 * Disable CMM virtualization; we will emulate the ESSA
3846 * instruction manually, in order to provide additional
3847 * functionalities needed for live migration.
3849 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3853 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3855 * Re-enable CMM virtualization if CMMA is available and
3856 * CMM has been used.
3858 if ((vcpu->kvm->arch.use_cmma) &&
3859 (vcpu->kvm->mm->context.uses_cmm))
3860 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3864 /* nothing to do, just clear the request */
3865 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3866 /* we left the vsie handler, nothing to do, just clear the request */
3867 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3872 void kvm_s390_set_tod_clock(struct kvm *kvm,
3873 const struct kvm_s390_vm_tod_clock *gtod)
3875 struct kvm_vcpu *vcpu;
3876 union tod_clock clk;
3879 mutex_lock(&kvm->lock);
3882 store_tod_clock_ext(&clk);
3884 kvm->arch.epoch = gtod->tod - clk.tod;
3886 if (test_kvm_facility(kvm, 139)) {
3887 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3888 if (kvm->arch.epoch > gtod->tod)
3889 kvm->arch.epdx -= 1;
3892 kvm_s390_vcpu_block_all(kvm);
3893 kvm_for_each_vcpu(i, vcpu, kvm) {
3894 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3895 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3898 kvm_s390_vcpu_unblock_all(kvm);
3900 mutex_unlock(&kvm->lock);
3904 * kvm_arch_fault_in_page - fault-in guest page if necessary
3905 * @vcpu: The corresponding virtual cpu
3906 * @gpa: Guest physical address
3907 * @writable: Whether the page should be writable or not
3909 * Make sure that a guest page has been faulted-in on the host.
3911 * Return: Zero on success, negative error code otherwise.
3913 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3915 return gmap_fault(vcpu->arch.gmap, gpa,
3916 writable ? FAULT_FLAG_WRITE : 0);
3919 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3920 unsigned long token)
3922 struct kvm_s390_interrupt inti;
3923 struct kvm_s390_irq irq;
3926 irq.u.ext.ext_params2 = token;
3927 irq.type = KVM_S390_INT_PFAULT_INIT;
3928 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3930 inti.type = KVM_S390_INT_PFAULT_DONE;
3931 inti.parm64 = token;
3932 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3936 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3937 struct kvm_async_pf *work)
3939 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3940 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3945 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3946 struct kvm_async_pf *work)
3948 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3949 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3952 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3953 struct kvm_async_pf *work)
3955 /* s390 will always inject the page directly */
3958 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3961 * s390 will always inject the page directly,
3962 * but we still want check_async_completion to cleanup
3967 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3970 struct kvm_arch_async_pf arch;
3972 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3974 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3975 vcpu->arch.pfault_compare)
3977 if (psw_extint_disabled(vcpu))
3979 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3981 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3983 if (!vcpu->arch.gmap->pfault_enabled)
3986 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3987 hva += current->thread.gmap_addr & ~PAGE_MASK;
3988 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3991 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3994 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3999 * On s390 notifications for arriving pages will be delivered directly
4000 * to the guest but the house keeping for completed pfaults is
4001 * handled outside the worker.
4003 kvm_check_async_pf_completion(vcpu);
4005 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4006 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4011 if (!kvm_is_ucontrol(vcpu->kvm)) {
4012 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4017 rc = kvm_s390_handle_requests(vcpu);
4021 if (guestdbg_enabled(vcpu)) {
4022 kvm_s390_backup_guest_per_regs(vcpu);
4023 kvm_s390_patch_guest_per_regs(vcpu);
4026 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4028 vcpu->arch.sie_block->icptcode = 0;
4029 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4030 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4031 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4036 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4038 struct kvm_s390_pgm_info pgm_info = {
4039 .code = PGM_ADDRESSING,
4044 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4045 trace_kvm_s390_sie_fault(vcpu);
4048 * We want to inject an addressing exception, which is defined as a
4049 * suppressing or terminating exception. However, since we came here
4050 * by a DAT access exception, the PSW still points to the faulting
4051 * instruction since DAT exceptions are nullifying. So we've got
4052 * to look up the current opcode to get the length of the instruction
4053 * to be able to forward the PSW.
4055 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4056 ilen = insn_length(opcode);
4060 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4061 * Forward by arbitrary ilc, injection will take care of
4062 * nullification if necessary.
4064 pgm_info = vcpu->arch.pgm;
4067 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4068 kvm_s390_forward_psw(vcpu, ilen);
4069 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4072 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4074 struct mcck_volatile_info *mcck_info;
4075 struct sie_page *sie_page;
4077 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4078 vcpu->arch.sie_block->icptcode);
4079 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4081 if (guestdbg_enabled(vcpu))
4082 kvm_s390_restore_guest_per_regs(vcpu);
4084 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4085 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4087 if (exit_reason == -EINTR) {
4088 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4089 sie_page = container_of(vcpu->arch.sie_block,
4090 struct sie_page, sie_block);
4091 mcck_info = &sie_page->mcck_info;
4092 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4096 if (vcpu->arch.sie_block->icptcode > 0) {
4097 int rc = kvm_handle_sie_intercept(vcpu);
4099 if (rc != -EOPNOTSUPP)
4101 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4102 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4103 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4104 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4106 } else if (exit_reason != -EFAULT) {
4107 vcpu->stat.exit_null++;
4109 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4110 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4111 vcpu->run->s390_ucontrol.trans_exc_code =
4112 current->thread.gmap_addr;
4113 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4115 } else if (current->thread.gmap_pfault) {
4116 trace_kvm_s390_major_guest_pfault(vcpu);
4117 current->thread.gmap_pfault = 0;
4118 if (kvm_arch_setup_async_pf(vcpu))
4120 vcpu->stat.pfault_sync++;
4121 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4123 return vcpu_post_run_fault_in_sie(vcpu);
4126 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4127 static int __vcpu_run(struct kvm_vcpu *vcpu)
4129 int rc, exit_reason;
4130 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4133 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4134 * ning the guest), so that memslots (and other stuff) are protected
4136 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4139 rc = vcpu_pre_run(vcpu);
4143 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4145 * As PF_VCPU will be used in fault handler, between
4146 * guest_enter and guest_exit should be no uaccess.
4148 local_irq_disable();
4149 guest_enter_irqoff();
4150 __disable_cpu_timer_accounting(vcpu);
4152 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153 memcpy(sie_page->pv_grregs,
4154 vcpu->run->s.regs.gprs,
4155 sizeof(sie_page->pv_grregs));
4157 if (test_cpu_flag(CIF_FPU))
4159 exit_reason = sie64a(vcpu->arch.sie_block,
4160 vcpu->run->s.regs.gprs);
4161 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4162 memcpy(vcpu->run->s.regs.gprs,
4163 sie_page->pv_grregs,
4164 sizeof(sie_page->pv_grregs));
4166 * We're not allowed to inject interrupts on intercepts
4167 * that leave the guest state in an "in-between" state
4168 * where the next SIE entry will do a continuation.
4169 * Fence interrupts in our "internal" PSW.
4171 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4172 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4173 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4176 local_irq_disable();
4177 __enable_cpu_timer_accounting(vcpu);
4178 guest_exit_irqoff();
4180 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4182 rc = vcpu_post_run(vcpu, exit_reason);
4183 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4185 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4189 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4191 struct kvm_run *kvm_run = vcpu->run;
4192 struct runtime_instr_cb *riccb;
4195 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4196 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4197 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4198 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4199 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4200 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4201 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4202 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4204 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4205 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4206 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4207 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4208 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4209 kvm_clear_async_pf_completion_queue(vcpu);
4211 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4212 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4213 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4214 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4217 * If userspace sets the riccb (e.g. after migration) to a valid state,
4218 * we should enable RI here instead of doing the lazy enablement.
4220 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4221 test_kvm_facility(vcpu->kvm, 64) &&
4223 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4224 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4225 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4228 * If userspace sets the gscb (e.g. after migration) to non-zero,
4229 * we should enable GS here instead of doing the lazy enablement.
4231 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4232 test_kvm_facility(vcpu->kvm, 133) &&
4234 !vcpu->arch.gs_enabled) {
4235 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4236 vcpu->arch.sie_block->ecb |= ECB_GS;
4237 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4238 vcpu->arch.gs_enabled = 1;
4240 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4241 test_kvm_facility(vcpu->kvm, 82)) {
4242 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4243 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4245 if (MACHINE_HAS_GS) {
4247 __ctl_set_bit(2, 4);
4248 if (current->thread.gs_cb) {
4249 vcpu->arch.host_gscb = current->thread.gs_cb;
4250 save_gs_cb(vcpu->arch.host_gscb);
4252 if (vcpu->arch.gs_enabled) {
4253 current->thread.gs_cb = (struct gs_cb *)
4254 &vcpu->run->s.regs.gscb;
4255 restore_gs_cb(current->thread.gs_cb);
4259 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4262 static void sync_regs(struct kvm_vcpu *vcpu)
4264 struct kvm_run *kvm_run = vcpu->run;
4266 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4267 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4268 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4269 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4270 /* some control register changes require a tlb flush */
4271 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4273 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4274 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4275 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4277 save_access_regs(vcpu->arch.host_acrs);
4278 restore_access_regs(vcpu->run->s.regs.acrs);
4279 /* save host (userspace) fprs/vrs */
4281 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4282 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4284 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4286 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4287 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4288 if (test_fp_ctl(current->thread.fpu.fpc))
4289 /* User space provided an invalid FPC, let's clear it */
4290 current->thread.fpu.fpc = 0;
4292 /* Sync fmt2 only data */
4293 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4294 sync_regs_fmt2(vcpu);
4297 * In several places we have to modify our internal view to
4298 * not do things that are disallowed by the ultravisor. For
4299 * example we must not inject interrupts after specific exits
4300 * (e.g. 112 prefix page not secure). We do this by turning
4301 * off the machine check, external and I/O interrupt bits
4302 * of our PSW copy. To avoid getting validity intercepts, we
4303 * do only accept the condition code from userspace.
4305 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4306 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4310 kvm_run->kvm_dirty_regs = 0;
4313 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4315 struct kvm_run *kvm_run = vcpu->run;
4317 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4318 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4319 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4320 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4321 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4322 if (MACHINE_HAS_GS) {
4324 __ctl_set_bit(2, 4);
4325 if (vcpu->arch.gs_enabled)
4326 save_gs_cb(current->thread.gs_cb);
4327 current->thread.gs_cb = vcpu->arch.host_gscb;
4328 restore_gs_cb(vcpu->arch.host_gscb);
4329 if (!vcpu->arch.host_gscb)
4330 __ctl_clear_bit(2, 4);
4331 vcpu->arch.host_gscb = NULL;
4334 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4337 static void store_regs(struct kvm_vcpu *vcpu)
4339 struct kvm_run *kvm_run = vcpu->run;
4341 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4342 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4343 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4344 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4345 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4346 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4347 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4348 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4349 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4350 save_access_regs(vcpu->run->s.regs.acrs);
4351 restore_access_regs(vcpu->arch.host_acrs);
4352 /* Save guest register state */
4354 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4355 /* Restore will be done lazily at return */
4356 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4357 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4358 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4359 store_regs_fmt2(vcpu);
4362 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4364 struct kvm_run *kvm_run = vcpu->run;
4367 if (kvm_run->immediate_exit)
4370 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4371 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4376 if (guestdbg_exit_pending(vcpu)) {
4377 kvm_s390_prepare_debug_exit(vcpu);
4382 kvm_sigset_activate(vcpu);
4385 * no need to check the return value of vcpu_start as it can only have
4386 * an error for protvirt, but protvirt means user cpu state
4388 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4389 kvm_s390_vcpu_start(vcpu);
4390 } else if (is_vcpu_stopped(vcpu)) {
4391 pr_err_ratelimited("can't run stopped vcpu %d\n",
4398 enable_cpu_timer_accounting(vcpu);
4401 rc = __vcpu_run(vcpu);
4403 if (signal_pending(current) && !rc) {
4404 kvm_run->exit_reason = KVM_EXIT_INTR;
4408 if (guestdbg_exit_pending(vcpu) && !rc) {
4409 kvm_s390_prepare_debug_exit(vcpu);
4413 if (rc == -EREMOTE) {
4414 /* userspace support is needed, kvm_run has been prepared */
4418 disable_cpu_timer_accounting(vcpu);
4421 kvm_sigset_deactivate(vcpu);
4423 vcpu->stat.exit_userspace++;
4430 * store status at address
4431 * we use have two special cases:
4432 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4433 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4435 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4437 unsigned char archmode = 1;
4438 freg_t fprs[NUM_FPRS];
4443 px = kvm_s390_get_prefix(vcpu);
4444 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4445 if (write_guest_abs(vcpu, 163, &archmode, 1))
4448 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4449 if (write_guest_real(vcpu, 163, &archmode, 1))
4453 gpa -= __LC_FPREGS_SAVE_AREA;
4455 /* manually convert vector registers if necessary */
4456 if (MACHINE_HAS_VX) {
4457 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4458 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4461 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4462 vcpu->run->s.regs.fprs, 128);
4464 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4465 vcpu->run->s.regs.gprs, 128);
4466 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4467 &vcpu->arch.sie_block->gpsw, 16);
4468 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4470 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4471 &vcpu->run->s.regs.fpc, 4);
4472 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4473 &vcpu->arch.sie_block->todpr, 4);
4474 cputm = kvm_s390_get_cpu_timer(vcpu);
4475 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4477 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4478 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4480 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4481 &vcpu->run->s.regs.acrs, 64);
4482 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4483 &vcpu->arch.sie_block->gcr, 128);
4484 return rc ? -EFAULT : 0;
4487 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4490 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4491 * switch in the run ioctl. Let's update our copies before we save
4492 * it into the save area
4495 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4496 save_access_regs(vcpu->run->s.regs.acrs);
4498 return kvm_s390_store_status_unloaded(vcpu, addr);
4501 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4503 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4504 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4507 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4510 struct kvm_vcpu *vcpu;
4512 kvm_for_each_vcpu(i, vcpu, kvm) {
4513 __disable_ibs_on_vcpu(vcpu);
4517 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4521 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4522 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4525 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4527 int i, online_vcpus, r = 0, started_vcpus = 0;
4529 if (!is_vcpu_stopped(vcpu))
4532 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4533 /* Only one cpu at a time may enter/leave the STOPPED state. */
4534 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4535 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4537 /* Let's tell the UV that we want to change into the operating state */
4538 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4539 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4541 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4546 for (i = 0; i < online_vcpus; i++) {
4547 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4551 if (started_vcpus == 0) {
4552 /* we're the only active VCPU -> speed it up */
4553 __enable_ibs_on_vcpu(vcpu);
4554 } else if (started_vcpus == 1) {
4556 * As we are starting a second VCPU, we have to disable
4557 * the IBS facility on all VCPUs to remove potentially
4558 * outstanding ENABLE requests.
4560 __disable_ibs_on_all_vcpus(vcpu->kvm);
4563 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4565 * The real PSW might have changed due to a RESTART interpreted by the
4566 * ultravisor. We block all interrupts and let the next sie exit
4569 if (kvm_s390_pv_cpu_is_protected(vcpu))
4570 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4572 * Another VCPU might have used IBS while we were offline.
4573 * Let's play safe and flush the VCPU at startup.
4575 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4576 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4580 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4582 int i, online_vcpus, r = 0, started_vcpus = 0;
4583 struct kvm_vcpu *started_vcpu = NULL;
4585 if (is_vcpu_stopped(vcpu))
4588 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4589 /* Only one cpu at a time may enter/leave the STOPPED state. */
4590 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4591 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4593 /* Let's tell the UV that we want to change into the stopped state */
4594 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4595 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4597 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4603 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4604 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4605 * have been fully processed. This will ensure that the VCPU
4606 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4608 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4609 kvm_s390_clear_stop_irq(vcpu);
4611 __disable_ibs_on_vcpu(vcpu);
4613 for (i = 0; i < online_vcpus; i++) {
4614 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4616 if (!is_vcpu_stopped(tmp)) {
4622 if (started_vcpus == 1) {
4624 * As we only have one VCPU left, we want to enable the
4625 * IBS facility for that VCPU to speed it up.
4627 __enable_ibs_on_vcpu(started_vcpu);
4630 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4634 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4635 struct kvm_enable_cap *cap)
4643 case KVM_CAP_S390_CSS_SUPPORT:
4644 if (!vcpu->kvm->arch.css_support) {
4645 vcpu->kvm->arch.css_support = 1;
4646 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4647 trace_kvm_s390_enable_css(vcpu->kvm);
4658 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4659 struct kvm_s390_mem_op *mop)
4661 void __user *uaddr = (void __user *)mop->buf;
4664 if (mop->flags || !mop->size)
4666 if (mop->size + mop->sida_offset < mop->size)
4668 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4672 case KVM_S390_MEMOP_SIDA_READ:
4673 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4674 mop->sida_offset), mop->size))
4678 case KVM_S390_MEMOP_SIDA_WRITE:
4679 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4680 mop->sida_offset), uaddr, mop->size))
4686 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4687 struct kvm_s390_mem_op *mop)
4689 void __user *uaddr = (void __user *)mop->buf;
4690 void *tmpbuf = NULL;
4692 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4693 | KVM_S390_MEMOP_F_CHECK_ONLY;
4695 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4698 if (mop->size > MEM_OP_MAX_SIZE)
4701 if (kvm_s390_pv_cpu_is_protected(vcpu))
4704 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4705 tmpbuf = vmalloc(mop->size);
4711 case KVM_S390_MEMOP_LOGICAL_READ:
4712 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4713 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4714 mop->size, GACC_FETCH);
4717 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4719 if (copy_to_user(uaddr, tmpbuf, mop->size))
4723 case KVM_S390_MEMOP_LOGICAL_WRITE:
4724 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4725 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4726 mop->size, GACC_STORE);
4729 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4733 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4737 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4738 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4744 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4745 struct kvm_s390_mem_op *mop)
4749 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4752 case KVM_S390_MEMOP_LOGICAL_READ:
4753 case KVM_S390_MEMOP_LOGICAL_WRITE:
4754 r = kvm_s390_guest_mem_op(vcpu, mop);
4756 case KVM_S390_MEMOP_SIDA_READ:
4757 case KVM_S390_MEMOP_SIDA_WRITE:
4758 /* we are locked against sida going away by the vcpu->mutex */
4759 r = kvm_s390_guest_sida_op(vcpu, mop);
4765 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4769 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4770 unsigned int ioctl, unsigned long arg)
4772 struct kvm_vcpu *vcpu = filp->private_data;
4773 void __user *argp = (void __user *)arg;
4776 case KVM_S390_IRQ: {
4777 struct kvm_s390_irq s390irq;
4779 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4781 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4783 case KVM_S390_INTERRUPT: {
4784 struct kvm_s390_interrupt s390int;
4785 struct kvm_s390_irq s390irq = {};
4787 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4789 if (s390int_to_s390irq(&s390int, &s390irq))
4791 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4794 return -ENOIOCTLCMD;
4797 long kvm_arch_vcpu_ioctl(struct file *filp,
4798 unsigned int ioctl, unsigned long arg)
4800 struct kvm_vcpu *vcpu = filp->private_data;
4801 void __user *argp = (void __user *)arg;
4809 case KVM_S390_STORE_STATUS:
4810 idx = srcu_read_lock(&vcpu->kvm->srcu);
4811 r = kvm_s390_store_status_unloaded(vcpu, arg);
4812 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4814 case KVM_S390_SET_INITIAL_PSW: {
4818 if (copy_from_user(&psw, argp, sizeof(psw)))
4820 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4823 case KVM_S390_CLEAR_RESET:
4825 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4826 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4827 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4828 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4829 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4833 case KVM_S390_INITIAL_RESET:
4835 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4836 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838 UVC_CMD_CPU_RESET_INITIAL,
4840 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4844 case KVM_S390_NORMAL_RESET:
4846 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4847 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4848 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4849 UVC_CMD_CPU_RESET, &rc, &rrc);
4850 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4854 case KVM_SET_ONE_REG:
4855 case KVM_GET_ONE_REG: {
4856 struct kvm_one_reg reg;
4858 if (kvm_s390_pv_cpu_is_protected(vcpu))
4861 if (copy_from_user(®, argp, sizeof(reg)))
4863 if (ioctl == KVM_SET_ONE_REG)
4864 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4866 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4869 #ifdef CONFIG_KVM_S390_UCONTROL
4870 case KVM_S390_UCAS_MAP: {
4871 struct kvm_s390_ucas_mapping ucasmap;
4873 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4878 if (!kvm_is_ucontrol(vcpu->kvm)) {
4883 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4884 ucasmap.vcpu_addr, ucasmap.length);
4887 case KVM_S390_UCAS_UNMAP: {
4888 struct kvm_s390_ucas_mapping ucasmap;
4890 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4895 if (!kvm_is_ucontrol(vcpu->kvm)) {
4900 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4905 case KVM_S390_VCPU_FAULT: {
4906 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4909 case KVM_ENABLE_CAP:
4911 struct kvm_enable_cap cap;
4913 if (copy_from_user(&cap, argp, sizeof(cap)))
4915 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4918 case KVM_S390_MEM_OP: {
4919 struct kvm_s390_mem_op mem_op;
4921 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4922 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4927 case KVM_S390_SET_IRQ_STATE: {
4928 struct kvm_s390_irq_state irq_state;
4931 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4933 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4934 irq_state.len == 0 ||
4935 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4939 /* do not use irq_state.flags, it will break old QEMUs */
4940 r = kvm_s390_set_irq_state(vcpu,
4941 (void __user *) irq_state.buf,
4945 case KVM_S390_GET_IRQ_STATE: {
4946 struct kvm_s390_irq_state irq_state;
4949 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4951 if (irq_state.len == 0) {
4955 /* do not use irq_state.flags, it will break old QEMUs */
4956 r = kvm_s390_get_irq_state(vcpu,
4957 (__u8 __user *) irq_state.buf,
4969 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4971 #ifdef CONFIG_KVM_S390_UCONTROL
4972 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4973 && (kvm_is_ucontrol(vcpu->kvm))) {
4974 vmf->page = virt_to_page(vcpu->arch.sie_block);
4975 get_page(vmf->page);
4979 return VM_FAULT_SIGBUS;
4982 /* Section: memory related */
4983 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4984 const struct kvm_memory_slot *old,
4985 struct kvm_memory_slot *new,
4986 enum kvm_mr_change change)
4990 /* When we are protected, we should not change the memory slots */
4991 if (kvm_s390_pv_get_handle(kvm))
4994 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
4997 /* A few sanity checks. We can have memory slots which have to be
4998 located/ended at a segment boundary (1MB). The memory in userland is
4999 ok to be fragmented into various different vmas. It is okay to mmap()
5000 and munmap() stuff in this slot after doing this call at any time */
5002 if (new->userspace_addr & 0xffffful)
5005 size = new->npages * PAGE_SIZE;
5006 if (size & 0xffffful)
5009 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5015 void kvm_arch_commit_memory_region(struct kvm *kvm,
5016 struct kvm_memory_slot *old,
5017 const struct kvm_memory_slot *new,
5018 enum kvm_mr_change change)
5024 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5025 old->npages * PAGE_SIZE);
5028 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5029 old->npages * PAGE_SIZE);
5034 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5035 new->base_gfn * PAGE_SIZE,
5036 new->npages * PAGE_SIZE);
5038 case KVM_MR_FLAGS_ONLY:
5041 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5044 pr_warn("failed to commit memory region\n");
5048 static inline unsigned long nonhyp_mask(int i)
5050 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5052 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5055 static int __init kvm_s390_init(void)
5059 if (!sclp.has_sief2) {
5060 pr_info("SIE is not available\n");
5064 if (nested && hpage) {
5065 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5069 for (i = 0; i < 16; i++)
5070 kvm_s390_fac_base[i] |=
5071 stfle_fac_list[i] & nonhyp_mask(i);
5073 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5076 static void __exit kvm_s390_exit(void)
5081 module_init(kvm_s390_init);
5082 module_exit(kvm_s390_exit);
5085 * Enable autoloading of the kvm module.
5086 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5087 * since x86 takes a different approach.
5089 #include <linux/miscdevice.h>
5090 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5091 MODULE_ALIAS("devname:kvm");