1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Christian Ehrhardt <ehrhardt@de.ibm.com>
10 * Jason J. Herne <jjherne@us.ibm.com>
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16 #include <linux/compiler.h>
17 #include <linux/err.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
40 #include <asm/switch_to.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
47 #include <asm/fpu/api.h>
51 #define CREATE_TRACE_POINTS
53 #include "trace-s390.h"
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
61 KVM_GENERIC_VM_STATS(),
62 STATS_DESC_COUNTER(VM, inject_io),
63 STATS_DESC_COUNTER(VM, inject_float_mchk),
64 STATS_DESC_COUNTER(VM, inject_pfault_done),
65 STATS_DESC_COUNTER(VM, inject_service_signal),
66 STATS_DESC_COUNTER(VM, inject_virtio)
69 const struct kvm_stats_header kvm_vm_stats_header = {
70 .name_size = KVM_STATS_NAME_SIZE,
71 .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
72 .id_offset = sizeof(struct kvm_stats_header),
73 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
74 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
75 sizeof(kvm_vm_stats_desc),
78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
79 KVM_GENERIC_VCPU_STATS(),
80 STATS_DESC_COUNTER(VCPU, exit_userspace),
81 STATS_DESC_COUNTER(VCPU, exit_null),
82 STATS_DESC_COUNTER(VCPU, exit_external_request),
83 STATS_DESC_COUNTER(VCPU, exit_io_request),
84 STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
85 STATS_DESC_COUNTER(VCPU, exit_stop_request),
86 STATS_DESC_COUNTER(VCPU, exit_validity),
87 STATS_DESC_COUNTER(VCPU, exit_instruction),
88 STATS_DESC_COUNTER(VCPU, exit_pei),
89 STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
90 STATS_DESC_COUNTER(VCPU, instruction_lctl),
91 STATS_DESC_COUNTER(VCPU, instruction_lctlg),
92 STATS_DESC_COUNTER(VCPU, instruction_stctl),
93 STATS_DESC_COUNTER(VCPU, instruction_stctg),
94 STATS_DESC_COUNTER(VCPU, exit_program_interruption),
95 STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
96 STATS_DESC_COUNTER(VCPU, exit_operation_exception),
97 STATS_DESC_COUNTER(VCPU, deliver_ckc),
98 STATS_DESC_COUNTER(VCPU, deliver_cputm),
99 STATS_DESC_COUNTER(VCPU, deliver_external_call),
100 STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
101 STATS_DESC_COUNTER(VCPU, deliver_service_signal),
102 STATS_DESC_COUNTER(VCPU, deliver_virtio),
103 STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
104 STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
105 STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
106 STATS_DESC_COUNTER(VCPU, deliver_program),
107 STATS_DESC_COUNTER(VCPU, deliver_io),
108 STATS_DESC_COUNTER(VCPU, deliver_machine_check),
109 STATS_DESC_COUNTER(VCPU, exit_wait_state),
110 STATS_DESC_COUNTER(VCPU, inject_ckc),
111 STATS_DESC_COUNTER(VCPU, inject_cputm),
112 STATS_DESC_COUNTER(VCPU, inject_external_call),
113 STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
114 STATS_DESC_COUNTER(VCPU, inject_mchk),
115 STATS_DESC_COUNTER(VCPU, inject_pfault_init),
116 STATS_DESC_COUNTER(VCPU, inject_program),
117 STATS_DESC_COUNTER(VCPU, inject_restart),
118 STATS_DESC_COUNTER(VCPU, inject_set_prefix),
119 STATS_DESC_COUNTER(VCPU, inject_stop_signal),
120 STATS_DESC_COUNTER(VCPU, instruction_epsw),
121 STATS_DESC_COUNTER(VCPU, instruction_gs),
122 STATS_DESC_COUNTER(VCPU, instruction_io_other),
123 STATS_DESC_COUNTER(VCPU, instruction_lpsw),
124 STATS_DESC_COUNTER(VCPU, instruction_lpswe),
125 STATS_DESC_COUNTER(VCPU, instruction_pfmf),
126 STATS_DESC_COUNTER(VCPU, instruction_ptff),
127 STATS_DESC_COUNTER(VCPU, instruction_sck),
128 STATS_DESC_COUNTER(VCPU, instruction_sckpf),
129 STATS_DESC_COUNTER(VCPU, instruction_stidp),
130 STATS_DESC_COUNTER(VCPU, instruction_spx),
131 STATS_DESC_COUNTER(VCPU, instruction_stpx),
132 STATS_DESC_COUNTER(VCPU, instruction_stap),
133 STATS_DESC_COUNTER(VCPU, instruction_iske),
134 STATS_DESC_COUNTER(VCPU, instruction_ri),
135 STATS_DESC_COUNTER(VCPU, instruction_rrbe),
136 STATS_DESC_COUNTER(VCPU, instruction_sske),
137 STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
138 STATS_DESC_COUNTER(VCPU, instruction_stsi),
139 STATS_DESC_COUNTER(VCPU, instruction_stfl),
140 STATS_DESC_COUNTER(VCPU, instruction_tb),
141 STATS_DESC_COUNTER(VCPU, instruction_tpi),
142 STATS_DESC_COUNTER(VCPU, instruction_tprot),
143 STATS_DESC_COUNTER(VCPU, instruction_tsch),
144 STATS_DESC_COUNTER(VCPU, instruction_sie),
145 STATS_DESC_COUNTER(VCPU, instruction_essa),
146 STATS_DESC_COUNTER(VCPU, instruction_sthyi),
147 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
148 STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
149 STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
150 STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
151 STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
152 STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
153 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
154 STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
155 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
156 STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
157 STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
158 STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
159 STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
160 STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
161 STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
162 STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
163 STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
164 STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
165 STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
166 STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
167 STATS_DESC_COUNTER(VCPU, diag_9c_forward),
168 STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
169 STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
170 STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
171 STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
172 STATS_DESC_COUNTER(VCPU, pfault_sync)
175 const struct kvm_stats_header kvm_vcpu_stats_header = {
176 .name_size = KVM_STATS_NAME_SIZE,
177 .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
178 .id_offset = sizeof(struct kvm_stats_header),
179 .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
180 .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
181 sizeof(kvm_vcpu_stats_desc),
184 /* allow nested virtualization in KVM (if enabled by user space) */
186 module_param(nested, int, S_IRUGO);
187 MODULE_PARM_DESC(nested, "Nested virtualization support");
189 /* allow 1m huge page guest backing, if !nested */
191 module_param(hpage, int, 0444);
192 MODULE_PARM_DESC(hpage, "1m huge page backing support");
194 /* maximum percentage of steal time for polling. >100 is treated like 100 */
195 static u8 halt_poll_max_steal = 10;
196 module_param(halt_poll_max_steal, byte, 0644);
197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
199 /* if set to true, the GISA will be initialized and used if available */
200 static bool use_gisa = true;
201 module_param(use_gisa, bool, 0644);
202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
204 /* maximum diag9c forwarding per second */
205 unsigned int diag9c_forwarding_hz;
206 module_param(diag9c_forwarding_hz, uint, 0644);
207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
210 * For now we handle at most 16 double words as this is what the s390 base
211 * kernel handles and stores in the prefix page. If we ever need to go beyond
212 * this, this requires changes to code, but the external uapi can stay.
214 #define SIZE_INTERNAL 16
217 * Base feature mask that defines default mask for facilities. Consists of the
218 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
222 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
223 * and defines the facilities that can be enabled via a cpu model.
225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
227 static unsigned long kvm_s390_fac_size(void)
229 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
230 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
231 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
232 sizeof(stfle_fac_list));
234 return SIZE_INTERNAL;
237 /* available cpu features supported by kvm */
238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
239 /* available subfunctions indicated via query / "test bit" */
240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
242 static struct gmap_notifier gmap_notifier;
243 static struct gmap_notifier vsie_gmap_notifier;
244 debug_info_t *kvm_s390_dbf;
245 debug_info_t *kvm_s390_dbf_uv;
247 /* Section: not file related */
248 int kvm_arch_hardware_enable(void)
250 /* every s390 is virtualization enabled ;-) */
254 int kvm_arch_check_processor_compat(void *opaque)
259 /* forward declarations */
260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
262 static int sca_switch_to_extended(struct kvm *kvm);
264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
269 * The TOD jumps by delta, we have to compensate this by adding
270 * -delta to the epoch.
274 /* sign-extension - we're adding to signed values below */
279 if (scb->ecd & ECD_MEF) {
280 scb->epdx += delta_idx;
281 if (scb->epoch < delta)
287 * This callback is executed during stop_machine(). All CPUs are therefore
288 * temporarily stopped. In order not to change guest behavior, we have to
289 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
290 * so a CPU won't be stopped while calculating with the epoch.
292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
296 struct kvm_vcpu *vcpu;
298 unsigned long long *delta = v;
300 list_for_each_entry(kvm, &vm_list, vm_list) {
301 kvm_for_each_vcpu(i, vcpu, kvm) {
302 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
304 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
305 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
307 if (vcpu->arch.cputm_enabled)
308 vcpu->arch.cputm_start += *delta;
309 if (vcpu->arch.vsie_block)
310 kvm_clock_sync_scb(vcpu->arch.vsie_block,
317 static struct notifier_block kvm_clock_notifier = {
318 .notifier_call = kvm_clock_sync,
321 int kvm_arch_hardware_setup(void *opaque)
323 gmap_notifier.notifier_call = kvm_gmap_notifier;
324 gmap_register_pte_notifier(&gmap_notifier);
325 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
326 gmap_register_pte_notifier(&vsie_gmap_notifier);
327 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
328 &kvm_clock_notifier);
332 void kvm_arch_hardware_unsetup(void)
334 gmap_unregister_pte_notifier(&gmap_notifier);
335 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
336 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
337 &kvm_clock_notifier);
340 static void allow_cpu_feat(unsigned long nr)
342 set_bit_inv(nr, kvm_s390_available_cpu_feat);
345 static inline int plo_test_bit(unsigned char nr)
347 unsigned long function = (unsigned long)nr | 0x100;
351 " lgr 0,%[function]\n"
352 /* Parameter registers are ignored for "test bit" */
357 : [function] "d" (function)
362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
367 /* Parameter registers are ignored */
368 " .insn rrf,%[opc] << 16,2,4,6,0\n"
370 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
371 : "cc", "memory", "0", "1");
374 #define INSN_SORTL 0xb938
375 #define INSN_DFLTCC 0xb939
377 static void kvm_s390_cpu_feat_init(void)
381 for (i = 0; i < 256; ++i) {
383 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
386 if (test_facility(28)) /* TOD-clock steering */
387 ptff(kvm_s390_available_subfunc.ptff,
388 sizeof(kvm_s390_available_subfunc.ptff),
391 if (test_facility(17)) { /* MSA */
392 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
393 kvm_s390_available_subfunc.kmac);
394 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
395 kvm_s390_available_subfunc.kmc);
396 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
397 kvm_s390_available_subfunc.km);
398 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
399 kvm_s390_available_subfunc.kimd);
400 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
401 kvm_s390_available_subfunc.klmd);
403 if (test_facility(76)) /* MSA3 */
404 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
405 kvm_s390_available_subfunc.pckmo);
406 if (test_facility(77)) { /* MSA4 */
407 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
408 kvm_s390_available_subfunc.kmctr);
409 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
410 kvm_s390_available_subfunc.kmf);
411 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
412 kvm_s390_available_subfunc.kmo);
413 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
414 kvm_s390_available_subfunc.pcc);
416 if (test_facility(57)) /* MSA5 */
417 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
418 kvm_s390_available_subfunc.ppno);
420 if (test_facility(146)) /* MSA8 */
421 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
422 kvm_s390_available_subfunc.kma);
424 if (test_facility(155)) /* MSA9 */
425 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
426 kvm_s390_available_subfunc.kdsa);
428 if (test_facility(150)) /* SORTL */
429 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
431 if (test_facility(151)) /* DFLTCC */
432 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
434 if (MACHINE_HAS_ESOP)
435 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
437 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
438 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
440 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
441 !test_facility(3) || !nested)
443 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
444 if (sclp.has_64bscao)
445 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
447 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
449 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
451 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
453 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
455 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
457 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
459 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
461 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
462 * all skey handling functions read/set the skey from the PGSTE
463 * instead of the real storage key.
465 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
466 * pages being detected as preserved although they are resident.
468 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
469 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
471 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
472 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
473 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
475 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
476 * cannot easily shadow the SCA because of the ipte lock.
480 int kvm_arch_init(void *opaque)
484 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
488 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
489 if (!kvm_s390_dbf_uv)
492 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
493 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
496 kvm_s390_cpu_feat_init();
498 /* Register floating interrupt controller interface. */
499 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
501 pr_err("A FLIC registration call failed with rc=%d\n", rc);
505 rc = kvm_s390_gib_init(GAL_ISC);
516 void kvm_arch_exit(void)
518 kvm_s390_gib_destroy();
519 debug_unregister(kvm_s390_dbf);
520 debug_unregister(kvm_s390_dbf_uv);
523 /* Section: device related */
524 long kvm_arch_dev_ioctl(struct file *filp,
525 unsigned int ioctl, unsigned long arg)
527 if (ioctl == KVM_S390_ENABLE_SIE)
528 return s390_enable_sie();
532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
537 case KVM_CAP_S390_PSW:
538 case KVM_CAP_S390_GMAP:
539 case KVM_CAP_SYNC_MMU:
540 #ifdef CONFIG_KVM_S390_UCONTROL
541 case KVM_CAP_S390_UCONTROL:
543 case KVM_CAP_ASYNC_PF:
544 case KVM_CAP_SYNC_REGS:
545 case KVM_CAP_ONE_REG:
546 case KVM_CAP_ENABLE_CAP:
547 case KVM_CAP_S390_CSS_SUPPORT:
548 case KVM_CAP_IOEVENTFD:
549 case KVM_CAP_DEVICE_CTRL:
550 case KVM_CAP_S390_IRQCHIP:
551 case KVM_CAP_VM_ATTRIBUTES:
552 case KVM_CAP_MP_STATE:
553 case KVM_CAP_IMMEDIATE_EXIT:
554 case KVM_CAP_S390_INJECT_IRQ:
555 case KVM_CAP_S390_USER_SIGP:
556 case KVM_CAP_S390_USER_STSI:
557 case KVM_CAP_S390_SKEYS:
558 case KVM_CAP_S390_IRQ_STATE:
559 case KVM_CAP_S390_USER_INSTR0:
560 case KVM_CAP_S390_CMMA_MIGRATION:
561 case KVM_CAP_S390_AIS:
562 case KVM_CAP_S390_AIS_MIGRATION:
563 case KVM_CAP_S390_VCPU_RESETS:
564 case KVM_CAP_SET_GUEST_DEBUG:
565 case KVM_CAP_S390_DIAG318:
566 case KVM_CAP_S390_MEM_OP_EXTENSION:
569 case KVM_CAP_SET_GUEST_DEBUG2:
570 r = KVM_GUESTDBG_VALID_MASK;
572 case KVM_CAP_S390_HPAGE_1M:
574 if (hpage && !kvm_is_ucontrol(kvm))
577 case KVM_CAP_S390_MEM_OP:
580 case KVM_CAP_NR_VCPUS:
581 case KVM_CAP_MAX_VCPUS:
582 case KVM_CAP_MAX_VCPU_ID:
583 r = KVM_S390_BSCA_CPU_SLOTS;
584 if (!kvm_s390_use_sca_entries())
586 else if (sclp.has_esca && sclp.has_64bscao)
587 r = KVM_S390_ESCA_CPU_SLOTS;
588 if (ext == KVM_CAP_NR_VCPUS)
589 r = min_t(unsigned int, num_online_cpus(), r);
591 case KVM_CAP_S390_COW:
592 r = MACHINE_HAS_ESOP;
594 case KVM_CAP_S390_VECTOR_REGISTERS:
597 case KVM_CAP_S390_RI:
598 r = test_facility(64);
600 case KVM_CAP_S390_GS:
601 r = test_facility(133);
603 case KVM_CAP_S390_BPB:
604 r = test_facility(82);
606 case KVM_CAP_S390_PROTECTED:
607 r = is_prot_virt_host();
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
618 gfn_t cur_gfn, last_gfn;
619 unsigned long gaddr, vmaddr;
620 struct gmap *gmap = kvm->arch.gmap;
621 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
623 /* Loop over all guest segments */
624 cur_gfn = memslot->base_gfn;
625 last_gfn = memslot->base_gfn + memslot->npages;
626 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627 gaddr = gfn_to_gpa(cur_gfn);
628 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629 if (kvm_is_error_hva(vmaddr))
632 bitmap_zero(bitmap, _PAGE_ENTRIES);
633 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634 for (i = 0; i < _PAGE_ENTRIES; i++) {
635 if (test_bit(i, bitmap))
636 mark_page_dirty(kvm, cur_gfn + i);
639 if (fatal_signal_pending(current))
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
649 * Get (and clear) the dirty memory log for a memory slot.
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652 struct kvm_dirty_log *log)
656 struct kvm_memory_slot *memslot;
659 if (kvm_is_ucontrol(kvm))
662 mutex_lock(&kvm->slots_lock);
665 if (log->slot >= KVM_USER_MEM_SLOTS)
668 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
672 /* Clear the dirty log */
674 n = kvm_dirty_bitmap_bytes(memslot);
675 memset(memslot->dirty_bitmap, 0, n);
679 mutex_unlock(&kvm->slots_lock);
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
686 struct kvm_vcpu *vcpu;
688 kvm_for_each_vcpu(i, vcpu, kvm) {
689 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
701 case KVM_CAP_S390_IRQCHIP:
702 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703 kvm->arch.use_irqchip = 1;
706 case KVM_CAP_S390_USER_SIGP:
707 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708 kvm->arch.user_sigp = 1;
711 case KVM_CAP_S390_VECTOR_REGISTERS:
712 mutex_lock(&kvm->lock);
713 if (kvm->created_vcpus) {
715 } else if (MACHINE_HAS_VX) {
716 set_kvm_facility(kvm->arch.model.fac_mask, 129);
717 set_kvm_facility(kvm->arch.model.fac_list, 129);
718 if (test_facility(134)) {
719 set_kvm_facility(kvm->arch.model.fac_mask, 134);
720 set_kvm_facility(kvm->arch.model.fac_list, 134);
722 if (test_facility(135)) {
723 set_kvm_facility(kvm->arch.model.fac_mask, 135);
724 set_kvm_facility(kvm->arch.model.fac_list, 135);
726 if (test_facility(148)) {
727 set_kvm_facility(kvm->arch.model.fac_mask, 148);
728 set_kvm_facility(kvm->arch.model.fac_list, 148);
730 if (test_facility(152)) {
731 set_kvm_facility(kvm->arch.model.fac_mask, 152);
732 set_kvm_facility(kvm->arch.model.fac_list, 152);
734 if (test_facility(192)) {
735 set_kvm_facility(kvm->arch.model.fac_mask, 192);
736 set_kvm_facility(kvm->arch.model.fac_list, 192);
741 mutex_unlock(&kvm->lock);
742 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743 r ? "(not available)" : "(success)");
745 case KVM_CAP_S390_RI:
747 mutex_lock(&kvm->lock);
748 if (kvm->created_vcpus) {
750 } else if (test_facility(64)) {
751 set_kvm_facility(kvm->arch.model.fac_mask, 64);
752 set_kvm_facility(kvm->arch.model.fac_list, 64);
755 mutex_unlock(&kvm->lock);
756 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757 r ? "(not available)" : "(success)");
759 case KVM_CAP_S390_AIS:
760 mutex_lock(&kvm->lock);
761 if (kvm->created_vcpus) {
764 set_kvm_facility(kvm->arch.model.fac_mask, 72);
765 set_kvm_facility(kvm->arch.model.fac_list, 72);
768 mutex_unlock(&kvm->lock);
769 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770 r ? "(not available)" : "(success)");
772 case KVM_CAP_S390_GS:
774 mutex_lock(&kvm->lock);
775 if (kvm->created_vcpus) {
777 } else if (test_facility(133)) {
778 set_kvm_facility(kvm->arch.model.fac_mask, 133);
779 set_kvm_facility(kvm->arch.model.fac_list, 133);
782 mutex_unlock(&kvm->lock);
783 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784 r ? "(not available)" : "(success)");
786 case KVM_CAP_S390_HPAGE_1M:
787 mutex_lock(&kvm->lock);
788 if (kvm->created_vcpus)
790 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
794 mmap_write_lock(kvm->mm);
795 kvm->mm->context.allow_gmap_hpage_1m = 1;
796 mmap_write_unlock(kvm->mm);
798 * We might have to create fake 4k page
799 * tables. To avoid that the hardware works on
800 * stale PGSTEs, we emulate these instructions.
802 kvm->arch.use_skf = 0;
803 kvm->arch.use_pfmfi = 0;
805 mutex_unlock(&kvm->lock);
806 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807 r ? "(not available)" : "(success)");
809 case KVM_CAP_S390_USER_STSI:
810 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811 kvm->arch.user_stsi = 1;
814 case KVM_CAP_S390_USER_INSTR0:
815 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816 kvm->arch.user_instr0 = 1;
817 icpt_operexc_on_all_vcpus(kvm);
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
831 switch (attr->attr) {
832 case KVM_S390_VM_MEM_LIMIT_SIZE:
834 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835 kvm->arch.mem_limit);
836 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
850 switch (attr->attr) {
851 case KVM_S390_VM_MEM_ENABLE_CMMA:
856 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857 mutex_lock(&kvm->lock);
858 if (kvm->created_vcpus)
860 else if (kvm->mm->context.allow_gmap_hpage_1m)
863 kvm->arch.use_cmma = 1;
864 /* Not compatible with cmma. */
865 kvm->arch.use_pfmfi = 0;
868 mutex_unlock(&kvm->lock);
870 case KVM_S390_VM_MEM_CLR_CMMA:
875 if (!kvm->arch.use_cmma)
878 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879 mutex_lock(&kvm->lock);
880 idx = srcu_read_lock(&kvm->srcu);
881 s390_reset_cmma(kvm->arch.gmap->mm);
882 srcu_read_unlock(&kvm->srcu, idx);
883 mutex_unlock(&kvm->lock);
886 case KVM_S390_VM_MEM_LIMIT_SIZE: {
887 unsigned long new_limit;
889 if (kvm_is_ucontrol(kvm))
892 if (get_user(new_limit, (u64 __user *)attr->addr))
895 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896 new_limit > kvm->arch.mem_limit)
902 /* gmap_create takes last usable address */
903 if (new_limit != KVM_S390_NO_MEM_LIMIT)
907 mutex_lock(&kvm->lock);
908 if (!kvm->created_vcpus) {
909 /* gmap_create will round the limit up */
910 struct gmap *new = gmap_create(current->mm, new_limit);
915 gmap_remove(kvm->arch.gmap);
917 kvm->arch.gmap = new;
921 mutex_unlock(&kvm->lock);
922 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924 (void *) kvm->arch.gmap->asce);
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
938 struct kvm_vcpu *vcpu;
941 kvm_s390_vcpu_block_all(kvm);
943 kvm_for_each_vcpu(i, vcpu, kvm) {
944 kvm_s390_vcpu_crypto_setup(vcpu);
945 /* recreate the shadow crycb by leaving the VSIE handler */
946 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
949 kvm_s390_vcpu_unblock_all(kvm);
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
954 mutex_lock(&kvm->lock);
955 switch (attr->attr) {
956 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957 if (!test_kvm_facility(kvm, 76)) {
958 mutex_unlock(&kvm->lock);
962 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964 kvm->arch.crypto.aes_kw = 1;
965 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
967 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968 if (!test_kvm_facility(kvm, 76)) {
969 mutex_unlock(&kvm->lock);
973 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975 kvm->arch.crypto.dea_kw = 1;
976 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
978 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979 if (!test_kvm_facility(kvm, 76)) {
980 mutex_unlock(&kvm->lock);
983 kvm->arch.crypto.aes_kw = 0;
984 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
988 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989 if (!test_kvm_facility(kvm, 76)) {
990 mutex_unlock(&kvm->lock);
993 kvm->arch.crypto.dea_kw = 0;
994 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
998 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999 if (!ap_instructions_available()) {
1000 mutex_unlock(&kvm->lock);
1003 kvm->arch.crypto.apie = 1;
1005 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006 if (!ap_instructions_available()) {
1007 mutex_unlock(&kvm->lock);
1010 kvm->arch.crypto.apie = 0;
1013 mutex_unlock(&kvm->lock);
1017 kvm_s390_vcpu_crypto_reset_all(kvm);
1018 mutex_unlock(&kvm->lock);
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1025 struct kvm_vcpu *vcpu;
1027 kvm_for_each_vcpu(cx, vcpu, kvm)
1028 kvm_s390_sync_request(req, vcpu);
1032 * Must be called with kvm->srcu held to avoid races on memslots, and with
1033 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1037 struct kvm_memory_slot *ms;
1038 struct kvm_memslots *slots;
1039 unsigned long ram_pages = 0;
1042 /* migration mode already enabled */
1043 if (kvm->arch.migration_mode)
1045 slots = kvm_memslots(kvm);
1046 if (!slots || kvm_memslots_empty(slots))
1049 if (!kvm->arch.use_cmma) {
1050 kvm->arch.migration_mode = 1;
1053 /* mark all the pages in active slots as dirty */
1054 kvm_for_each_memslot(ms, bkt, slots) {
1055 if (!ms->dirty_bitmap)
1058 * The second half of the bitmap is only used on x86,
1059 * and would be wasted otherwise, so we put it to good
1060 * use here to keep track of the state of the storage
1063 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064 ram_pages += ms->npages;
1066 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067 kvm->arch.migration_mode = 1;
1068 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1073 * Must be called with kvm->slots_lock to avoid races with ourselves and
1074 * kvm_s390_vm_start_migration.
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1078 /* migration mode already disabled */
1079 if (!kvm->arch.migration_mode)
1081 kvm->arch.migration_mode = 0;
1082 if (kvm->arch.use_cmma)
1083 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088 struct kvm_device_attr *attr)
1092 mutex_lock(&kvm->slots_lock);
1093 switch (attr->attr) {
1094 case KVM_S390_VM_MIGRATION_START:
1095 res = kvm_s390_vm_start_migration(kvm);
1097 case KVM_S390_VM_MIGRATION_STOP:
1098 res = kvm_s390_vm_stop_migration(kvm);
1103 mutex_unlock(&kvm->slots_lock);
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109 struct kvm_device_attr *attr)
1111 u64 mig = kvm->arch.migration_mode;
1113 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1116 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1123 struct kvm_s390_vm_tod_clock gtod;
1125 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1128 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1130 kvm_s390_set_tod_clock(kvm, >od);
1132 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133 gtod.epoch_idx, gtod.tod);
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1142 if (copy_from_user(>od_high, (void __user *)attr->addr,
1148 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 struct kvm_s390_vm_tod_clock gtod = { 0 };
1157 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1161 kvm_s390_set_tod_clock(kvm, >od);
1162 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1173 switch (attr->attr) {
1174 case KVM_S390_VM_TOD_EXT:
1175 ret = kvm_s390_set_tod_ext(kvm, attr);
1177 case KVM_S390_VM_TOD_HIGH:
1178 ret = kvm_s390_set_tod_high(kvm, attr);
1180 case KVM_S390_VM_TOD_LOW:
1181 ret = kvm_s390_set_tod_low(kvm, attr);
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191 struct kvm_s390_vm_tod_clock *gtod)
1193 union tod_clock clk;
1197 store_tod_clock_ext(&clk);
1199 gtod->tod = clk.tod + kvm->arch.epoch;
1200 gtod->epoch_idx = 0;
1201 if (test_kvm_facility(kvm, 139)) {
1202 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203 if (gtod->tod < clk.tod)
1204 gtod->epoch_idx += 1;
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1212 struct kvm_s390_vm_tod_clock gtod;
1214 memset(>od, 0, sizeof(gtod));
1215 kvm_s390_get_tod_clock(kvm, >od);
1216 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1219 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220 gtod.epoch_idx, gtod.tod);
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228 if (copy_to_user((void __user *)attr->addr, >od_high,
1231 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1240 gtod = kvm_s390_get_tod_clock_fast(kvm);
1241 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1243 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1255 switch (attr->attr) {
1256 case KVM_S390_VM_TOD_EXT:
1257 ret = kvm_s390_get_tod_ext(kvm, attr);
1259 case KVM_S390_VM_TOD_HIGH:
1260 ret = kvm_s390_get_tod_high(kvm, attr);
1262 case KVM_S390_VM_TOD_LOW:
1263 ret = kvm_s390_get_tod_low(kvm, attr);
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1274 struct kvm_s390_vm_cpu_processor *proc;
1275 u16 lowest_ibc, unblocked_ibc;
1278 mutex_lock(&kvm->lock);
1279 if (kvm->created_vcpus) {
1283 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1288 if (!copy_from_user(proc, (void __user *)attr->addr,
1290 kvm->arch.model.cpuid = proc->cpuid;
1291 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292 unblocked_ibc = sclp.ibc & 0xfff;
1293 if (lowest_ibc && proc->ibc) {
1294 if (proc->ibc > unblocked_ibc)
1295 kvm->arch.model.ibc = unblocked_ibc;
1296 else if (proc->ibc < lowest_ibc)
1297 kvm->arch.model.ibc = lowest_ibc;
1299 kvm->arch.model.ibc = proc->ibc;
1301 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302 S390_ARCH_FAC_LIST_SIZE_BYTE);
1303 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304 kvm->arch.model.ibc,
1305 kvm->arch.model.cpuid);
1306 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307 kvm->arch.model.fac_list[0],
1308 kvm->arch.model.fac_list[1],
1309 kvm->arch.model.fac_list[2]);
1314 mutex_unlock(&kvm->lock);
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319 struct kvm_device_attr *attr)
1321 struct kvm_s390_vm_cpu_feat data;
1323 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1325 if (!bitmap_subset((unsigned long *) data.feat,
1326 kvm_s390_available_cpu_feat,
1327 KVM_S390_VM_CPU_FEAT_NR_BITS))
1330 mutex_lock(&kvm->lock);
1331 if (kvm->created_vcpus) {
1332 mutex_unlock(&kvm->lock);
1335 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336 KVM_S390_VM_CPU_FEAT_NR_BITS);
1337 mutex_unlock(&kvm->lock);
1338 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346 struct kvm_device_attr *attr)
1348 mutex_lock(&kvm->lock);
1349 if (kvm->created_vcpus) {
1350 mutex_unlock(&kvm->lock);
1354 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356 mutex_unlock(&kvm->lock);
1359 mutex_unlock(&kvm->lock);
1361 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1367 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1388 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1391 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1394 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1397 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1400 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1403 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1406 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1426 switch (attr->attr) {
1427 case KVM_S390_VM_CPU_PROCESSOR:
1428 ret = kvm_s390_set_processor(kvm, attr);
1430 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431 ret = kvm_s390_set_processor_feat(kvm, attr);
1433 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1442 struct kvm_s390_vm_cpu_processor *proc;
1445 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1450 proc->cpuid = kvm->arch.model.cpuid;
1451 proc->ibc = kvm->arch.model.ibc;
1452 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453 S390_ARCH_FAC_LIST_SIZE_BYTE);
1454 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455 kvm->arch.model.ibc,
1456 kvm->arch.model.cpuid);
1457 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458 kvm->arch.model.fac_list[0],
1459 kvm->arch.model.fac_list[1],
1460 kvm->arch.model.fac_list[2]);
1461 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1470 struct kvm_s390_vm_cpu_machine *mach;
1473 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1478 get_cpu_id((struct cpuid *) &mach->cpuid);
1479 mach->ibc = sclp.ibc;
1480 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481 S390_ARCH_FAC_LIST_SIZE_BYTE);
1482 memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483 sizeof(stfle_fac_list));
1484 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1485 kvm->arch.model.ibc,
1486 kvm->arch.model.cpuid);
1487 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1491 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1495 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503 struct kvm_device_attr *attr)
1505 struct kvm_s390_vm_cpu_feat data;
1507 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508 KVM_S390_VM_CPU_FEAT_NR_BITS);
1509 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1511 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519 struct kvm_device_attr *attr)
1521 struct kvm_s390_vm_cpu_feat data;
1523 bitmap_copy((unsigned long *) data.feat,
1524 kvm_s390_available_cpu_feat,
1525 KVM_S390_VM_CPU_FEAT_NR_BITS);
1526 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1528 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536 struct kvm_device_attr *attr)
1538 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1542 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1548 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1569 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1572 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1575 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1578 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1581 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1584 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604 struct kvm_device_attr *attr)
1606 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1610 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1616 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1637 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1640 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1643 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1646 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1649 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1652 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1655 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1675 switch (attr->attr) {
1676 case KVM_S390_VM_CPU_PROCESSOR:
1677 ret = kvm_s390_get_processor(kvm, attr);
1679 case KVM_S390_VM_CPU_MACHINE:
1680 ret = kvm_s390_get_machine(kvm, attr);
1682 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683 ret = kvm_s390_get_processor_feat(kvm, attr);
1685 case KVM_S390_VM_CPU_MACHINE_FEAT:
1686 ret = kvm_s390_get_machine_feat(kvm, attr);
1688 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1691 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702 switch (attr->group) {
1703 case KVM_S390_VM_MEM_CTRL:
1704 ret = kvm_s390_set_mem_control(kvm, attr);
1706 case KVM_S390_VM_TOD:
1707 ret = kvm_s390_set_tod(kvm, attr);
1709 case KVM_S390_VM_CPU_MODEL:
1710 ret = kvm_s390_set_cpu_model(kvm, attr);
1712 case KVM_S390_VM_CRYPTO:
1713 ret = kvm_s390_vm_set_crypto(kvm, attr);
1715 case KVM_S390_VM_MIGRATION:
1716 ret = kvm_s390_vm_set_migration(kvm, attr);
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1730 switch (attr->group) {
1731 case KVM_S390_VM_MEM_CTRL:
1732 ret = kvm_s390_get_mem_control(kvm, attr);
1734 case KVM_S390_VM_TOD:
1735 ret = kvm_s390_get_tod(kvm, attr);
1737 case KVM_S390_VM_CPU_MODEL:
1738 ret = kvm_s390_get_cpu_model(kvm, attr);
1740 case KVM_S390_VM_MIGRATION:
1741 ret = kvm_s390_vm_get_migration(kvm, attr);
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1755 switch (attr->group) {
1756 case KVM_S390_VM_MEM_CTRL:
1757 switch (attr->attr) {
1758 case KVM_S390_VM_MEM_ENABLE_CMMA:
1759 case KVM_S390_VM_MEM_CLR_CMMA:
1760 ret = sclp.has_cmma ? 0 : -ENXIO;
1762 case KVM_S390_VM_MEM_LIMIT_SIZE:
1770 case KVM_S390_VM_TOD:
1771 switch (attr->attr) {
1772 case KVM_S390_VM_TOD_LOW:
1773 case KVM_S390_VM_TOD_HIGH:
1781 case KVM_S390_VM_CPU_MODEL:
1782 switch (attr->attr) {
1783 case KVM_S390_VM_CPU_PROCESSOR:
1784 case KVM_S390_VM_CPU_MACHINE:
1785 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786 case KVM_S390_VM_CPU_MACHINE_FEAT:
1787 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1796 case KVM_S390_VM_CRYPTO:
1797 switch (attr->attr) {
1798 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1804 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806 ret = ap_instructions_available() ? 0 : -ENXIO;
1813 case KVM_S390_VM_MIGRATION:
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1828 int srcu_idx, i, r = 0;
1830 if (args->flags != 0)
1833 /* Is this guest using storage keys? */
1834 if (!mm_uses_skeys(current->mm))
1835 return KVM_S390_GET_SKEYS_NONE;
1837 /* Enforce sane limit on memory allocation */
1838 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1841 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1845 mmap_read_lock(current->mm);
1846 srcu_idx = srcu_read_lock(&kvm->srcu);
1847 for (i = 0; i < args->count; i++) {
1848 hva = gfn_to_hva(kvm, args->start_gfn + i);
1849 if (kvm_is_error_hva(hva)) {
1854 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1858 srcu_read_unlock(&kvm->srcu, srcu_idx);
1859 mmap_read_unlock(current->mm);
1862 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863 sizeof(uint8_t) * args->count);
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1876 int srcu_idx, i, r = 0;
1879 if (args->flags != 0)
1882 /* Enforce sane limit on memory allocation */
1883 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1886 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1890 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891 sizeof(uint8_t) * args->count);
1897 /* Enable storage key handling for the guest */
1898 r = s390_enable_skey();
1903 mmap_read_lock(current->mm);
1904 srcu_idx = srcu_read_lock(&kvm->srcu);
1905 while (i < args->count) {
1907 hva = gfn_to_hva(kvm, args->start_gfn + i);
1908 if (kvm_is_error_hva(hva)) {
1913 /* Lowest order bit is reserved */
1914 if (keys[i] & 0x01) {
1919 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1921 r = fixup_user_fault(current->mm, hva,
1922 FAULT_FLAG_WRITE, &unlocked);
1929 srcu_read_unlock(&kvm->srcu, srcu_idx);
1930 mmap_read_unlock(current->mm);
1937 * Base address and length must be sent at the start of each block, therefore
1938 * it's cheaper to send some clean data, as long as it's less than the size of
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946 u8 *res, unsigned long bufsize)
1948 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1951 while (args->count < bufsize) {
1952 hva = gfn_to_hva(kvm, cur_gfn);
1954 * We return an error if the first value was invalid, but we
1955 * return successfully if at least one value was copied.
1957 if (kvm_is_error_hva(hva))
1958 return args->count ? 0 : -EFAULT;
1959 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1961 res[args->count++] = (pgstev >> 24) & 0x43;
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1971 return ____gfn_to_memslot(slots, gfn, true);
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975 unsigned long cur_gfn)
1977 struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978 unsigned long ofs = cur_gfn - ms->base_gfn;
1979 struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1981 if (ms->base_gfn + ms->npages <= cur_gfn) {
1982 mnode = rb_next(mnode);
1983 /* If we are above the highest slot, wrap around */
1985 mnode = rb_first(&slots->gfn_tree);
1987 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1990 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991 while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1995 return ms->base_gfn + ofs;
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999 u8 *res, unsigned long bufsize)
2001 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002 struct kvm_memslots *slots = kvm_memslots(kvm);
2003 struct kvm_memory_slot *ms;
2005 if (unlikely(kvm_memslots_empty(slots)))
2008 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009 ms = gfn_to_memslot(kvm, cur_gfn);
2011 args->start_gfn = cur_gfn;
2014 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015 mem_end = kvm_s390_get_gfn_end(slots);
2017 while (args->count < bufsize) {
2018 hva = gfn_to_hva(kvm, cur_gfn);
2019 if (kvm_is_error_hva(hva))
2021 /* Decrement only if we actually flipped the bit to 0 */
2022 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2026 /* Save the value */
2027 res[args->count++] = (pgstev >> 24) & 0x43;
2028 /* If the next bit is too far away, stop. */
2029 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2031 /* If we reached the previous "next", find the next one */
2032 if (cur_gfn == next_gfn)
2033 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034 /* Reached the end of memory or of the buffer, stop */
2035 if ((next_gfn >= mem_end) ||
2036 (next_gfn - args->start_gfn >= bufsize))
2039 /* Reached the end of the current memslot, take the next one. */
2040 if (cur_gfn - ms->base_gfn >= ms->npages) {
2041 ms = gfn_to_memslot(kvm, cur_gfn);
2050 * This function searches for the next page with dirty CMMA attributes, and
2051 * saves the attributes in the buffer up to either the end of the buffer or
2052 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053 * no trailing clean bytes are saved.
2054 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055 * output buffer will indicate 0 as length.
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058 struct kvm_s390_cmma_log *args)
2060 unsigned long bufsize;
2061 int srcu_idx, peek, ret;
2064 if (!kvm->arch.use_cmma)
2066 /* Invalid/unsupported flags were specified */
2067 if (args->flags & ~KVM_S390_CMMA_PEEK)
2069 /* Migration mode query, and we are not doing a migration */
2070 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071 if (!peek && !kvm->arch.migration_mode)
2073 /* CMMA is disabled or was not used, or the buffer has length zero */
2074 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075 if (!bufsize || !kvm->mm->context.uses_cmm) {
2076 memset(args, 0, sizeof(*args));
2079 /* We are not peeking, and there are no dirty pages */
2080 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081 memset(args, 0, sizeof(*args));
2085 values = vmalloc(bufsize);
2089 mmap_read_lock(kvm->mm);
2090 srcu_idx = srcu_read_lock(&kvm->srcu);
2092 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2094 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095 srcu_read_unlock(&kvm->srcu, srcu_idx);
2096 mmap_read_unlock(kvm->mm);
2098 if (kvm->arch.migration_mode)
2099 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2101 args->remaining = 0;
2103 if (copy_to_user((void __user *)args->values, values, args->count))
2111 * This function sets the CMMA attributes for the given pages. If the input
2112 * buffer has zero length, no action is taken, otherwise the attributes are
2113 * set and the mm->context.uses_cmm flag is set.
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116 const struct kvm_s390_cmma_log *args)
2118 unsigned long hva, mask, pgstev, i;
2120 int srcu_idx, r = 0;
2124 if (!kvm->arch.use_cmma)
2126 /* invalid/unsupported flags */
2127 if (args->flags != 0)
2129 /* Enforce sane limit on memory allocation */
2130 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2133 if (args->count == 0)
2136 bits = vmalloc(array_size(sizeof(*bits), args->count));
2140 r = copy_from_user(bits, (void __user *)args->values, args->count);
2146 mmap_read_lock(kvm->mm);
2147 srcu_idx = srcu_read_lock(&kvm->srcu);
2148 for (i = 0; i < args->count; i++) {
2149 hva = gfn_to_hva(kvm, args->start_gfn + i);
2150 if (kvm_is_error_hva(hva)) {
2156 pgstev = pgstev << 24;
2157 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2160 srcu_read_unlock(&kvm->srcu, srcu_idx);
2161 mmap_read_unlock(kvm->mm);
2163 if (!kvm->mm->context.uses_cmm) {
2164 mmap_write_lock(kvm->mm);
2165 kvm->mm->context.uses_cmm = 1;
2166 mmap_write_unlock(kvm->mm);
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2175 struct kvm_vcpu *vcpu;
2181 * We ignore failures and try to destroy as many CPUs as possible.
2182 * At the same time we must not free the assigned resources when
2183 * this fails, as the ultravisor has still access to that memory.
2184 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2186 * We want to return the first failure rc and rrc, though.
2188 kvm_for_each_vcpu(i, vcpu, kvm) {
2189 mutex_lock(&vcpu->mutex);
2190 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2195 mutex_unlock(&vcpu->mutex);
2197 /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2199 kvm_s390_gisa_enable(kvm);
2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2209 struct kvm_vcpu *vcpu;
2211 /* Disable the GISA if the ultravisor does not support AIV. */
2212 if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2213 kvm_s390_gisa_disable(kvm);
2215 kvm_for_each_vcpu(i, vcpu, kvm) {
2216 mutex_lock(&vcpu->mutex);
2217 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2218 mutex_unlock(&vcpu->mutex);
2223 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2227 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2231 void __user *argp = (void __user *)cmd->data;
2234 case KVM_PV_ENABLE: {
2236 if (kvm_s390_pv_is_protected(kvm))
2240 * FMT 4 SIE needs esca. As we never switch back to bsca from
2241 * esca, we need no cleanup in the error cases below
2243 r = sca_switch_to_extended(kvm);
2247 mmap_write_lock(current->mm);
2248 r = gmap_mark_unmergeable();
2249 mmap_write_unlock(current->mm);
2253 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2257 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2259 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2261 /* we need to block service interrupts from now on */
2262 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2265 case KVM_PV_DISABLE: {
2267 if (!kvm_s390_pv_is_protected(kvm))
2270 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2272 * If a CPU could not be destroyed, destroy VM will also fail.
2273 * There is no point in trying to destroy it. Instead return
2274 * the rc and rrc from the first CPU that failed destroying.
2278 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2280 /* no need to block service interrupts any more */
2281 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2284 case KVM_PV_SET_SEC_PARMS: {
2285 struct kvm_s390_pv_sec_parm parms = {};
2289 if (!kvm_s390_pv_is_protected(kvm))
2293 if (copy_from_user(&parms, argp, sizeof(parms)))
2296 /* Currently restricted to 8KB */
2298 if (parms.length > PAGE_SIZE * 2)
2302 hdr = vmalloc(parms.length);
2307 if (!copy_from_user(hdr, (void __user *)parms.origin,
2309 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2310 &cmd->rc, &cmd->rrc);
2315 case KVM_PV_UNPACK: {
2316 struct kvm_s390_pv_unp unp = {};
2319 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2323 if (copy_from_user(&unp, argp, sizeof(unp)))
2326 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2327 &cmd->rc, &cmd->rrc);
2330 case KVM_PV_VERIFY: {
2332 if (!kvm_s390_pv_is_protected(kvm))
2335 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2336 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2337 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2341 case KVM_PV_PREP_RESET: {
2343 if (!kvm_s390_pv_is_protected(kvm))
2346 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2347 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2348 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2352 case KVM_PV_UNSHARE_ALL: {
2354 if (!kvm_s390_pv_is_protected(kvm))
2357 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2358 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2359 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2369 static bool access_key_invalid(u8 access_key)
2371 return access_key > 0xf;
2374 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2376 void __user *uaddr = (void __user *)mop->buf;
2377 u64 supported_flags;
2378 void *tmpbuf = NULL;
2381 supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2382 | KVM_S390_MEMOP_F_CHECK_ONLY;
2383 if (mop->flags & ~supported_flags || !mop->size)
2385 if (mop->size > MEM_OP_MAX_SIZE)
2387 if (kvm_s390_pv_is_protected(kvm))
2389 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2390 if (access_key_invalid(mop->key))
2395 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2396 tmpbuf = vmalloc(mop->size);
2401 srcu_idx = srcu_read_lock(&kvm->srcu);
2403 if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2409 case KVM_S390_MEMOP_ABSOLUTE_READ: {
2410 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2411 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2413 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2414 mop->size, GACC_FETCH, mop->key);
2416 if (copy_to_user(uaddr, tmpbuf, mop->size))
2422 case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2423 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2424 r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2426 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2430 r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2431 mop->size, GACC_STORE, mop->key);
2440 srcu_read_unlock(&kvm->srcu, srcu_idx);
2446 long kvm_arch_vm_ioctl(struct file *filp,
2447 unsigned int ioctl, unsigned long arg)
2449 struct kvm *kvm = filp->private_data;
2450 void __user *argp = (void __user *)arg;
2451 struct kvm_device_attr attr;
2455 case KVM_S390_INTERRUPT: {
2456 struct kvm_s390_interrupt s390int;
2459 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2461 r = kvm_s390_inject_vm(kvm, &s390int);
2464 case KVM_CREATE_IRQCHIP: {
2465 struct kvm_irq_routing_entry routing;
2468 if (kvm->arch.use_irqchip) {
2469 /* Set up dummy routing. */
2470 memset(&routing, 0, sizeof(routing));
2471 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2475 case KVM_SET_DEVICE_ATTR: {
2477 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2479 r = kvm_s390_vm_set_attr(kvm, &attr);
2482 case KVM_GET_DEVICE_ATTR: {
2484 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2486 r = kvm_s390_vm_get_attr(kvm, &attr);
2489 case KVM_HAS_DEVICE_ATTR: {
2491 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2493 r = kvm_s390_vm_has_attr(kvm, &attr);
2496 case KVM_S390_GET_SKEYS: {
2497 struct kvm_s390_skeys args;
2500 if (copy_from_user(&args, argp,
2501 sizeof(struct kvm_s390_skeys)))
2503 r = kvm_s390_get_skeys(kvm, &args);
2506 case KVM_S390_SET_SKEYS: {
2507 struct kvm_s390_skeys args;
2510 if (copy_from_user(&args, argp,
2511 sizeof(struct kvm_s390_skeys)))
2513 r = kvm_s390_set_skeys(kvm, &args);
2516 case KVM_S390_GET_CMMA_BITS: {
2517 struct kvm_s390_cmma_log args;
2520 if (copy_from_user(&args, argp, sizeof(args)))
2522 mutex_lock(&kvm->slots_lock);
2523 r = kvm_s390_get_cmma_bits(kvm, &args);
2524 mutex_unlock(&kvm->slots_lock);
2526 r = copy_to_user(argp, &args, sizeof(args));
2532 case KVM_S390_SET_CMMA_BITS: {
2533 struct kvm_s390_cmma_log args;
2536 if (copy_from_user(&args, argp, sizeof(args)))
2538 mutex_lock(&kvm->slots_lock);
2539 r = kvm_s390_set_cmma_bits(kvm, &args);
2540 mutex_unlock(&kvm->slots_lock);
2543 case KVM_S390_PV_COMMAND: {
2544 struct kvm_pv_cmd args;
2546 /* protvirt means user cpu state */
2547 kvm_s390_set_user_cpu_state_ctrl(kvm);
2549 if (!is_prot_virt_host()) {
2553 if (copy_from_user(&args, argp, sizeof(args))) {
2561 mutex_lock(&kvm->lock);
2562 r = kvm_s390_handle_pv(kvm, &args);
2563 mutex_unlock(&kvm->lock);
2564 if (copy_to_user(argp, &args, sizeof(args))) {
2570 case KVM_S390_MEM_OP: {
2571 struct kvm_s390_mem_op mem_op;
2573 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2574 r = kvm_s390_vm_mem_op(kvm, &mem_op);
2586 static int kvm_s390_apxa_installed(void)
2588 struct ap_config_info info;
2590 if (ap_instructions_available()) {
2591 if (ap_qci(&info) == 0)
2599 * The format of the crypto control block (CRYCB) is specified in the 3 low
2600 * order bits of the CRYCB designation (CRYCBD) field as follows:
2601 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2602 * AP extended addressing (APXA) facility are installed.
2603 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2604 * Format 2: Both the APXA and MSAX3 facilities are installed
2606 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2608 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2610 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2611 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2613 /* Check whether MSAX3 is installed */
2614 if (!test_kvm_facility(kvm, 76))
2617 if (kvm_s390_apxa_installed())
2618 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2620 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2624 * kvm_arch_crypto_set_masks
2626 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2628 * @apm: the mask identifying the accessible AP adapters
2629 * @aqm: the mask identifying the accessible AP domains
2630 * @adm: the mask identifying the accessible AP control domains
2632 * Set the masks that identify the adapters, domains and control domains to
2633 * which the KVM guest is granted access.
2635 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2638 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2639 unsigned long *aqm, unsigned long *adm)
2641 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2643 kvm_s390_vcpu_block_all(kvm);
2645 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2646 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2647 memcpy(crycb->apcb1.apm, apm, 32);
2648 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2649 apm[0], apm[1], apm[2], apm[3]);
2650 memcpy(crycb->apcb1.aqm, aqm, 32);
2651 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2652 aqm[0], aqm[1], aqm[2], aqm[3]);
2653 memcpy(crycb->apcb1.adm, adm, 32);
2654 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2655 adm[0], adm[1], adm[2], adm[3]);
2658 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2659 memcpy(crycb->apcb0.apm, apm, 8);
2660 memcpy(crycb->apcb0.aqm, aqm, 2);
2661 memcpy(crycb->apcb0.adm, adm, 2);
2662 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2663 apm[0], *((unsigned short *)aqm),
2664 *((unsigned short *)adm));
2666 default: /* Can not happen */
2670 /* recreate the shadow crycb for each vcpu */
2671 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2672 kvm_s390_vcpu_unblock_all(kvm);
2674 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2677 * kvm_arch_crypto_clear_masks
2679 * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2682 * Clear the masks that identify the adapters, domains and control domains to
2683 * which the KVM guest is granted access.
2685 * Note: The kvm->lock mutex must be locked by the caller before invoking this
2688 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2690 kvm_s390_vcpu_block_all(kvm);
2692 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2693 sizeof(kvm->arch.crypto.crycb->apcb0));
2694 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2695 sizeof(kvm->arch.crypto.crycb->apcb1));
2697 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2698 /* recreate the shadow crycb for each vcpu */
2699 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2700 kvm_s390_vcpu_unblock_all(kvm);
2702 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2704 static u64 kvm_s390_get_initial_cpuid(void)
2709 cpuid.version = 0xff;
2710 return *((u64 *) &cpuid);
2713 static void kvm_s390_crypto_init(struct kvm *kvm)
2715 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2716 kvm_s390_set_crycb_format(kvm);
2717 init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2719 if (!test_kvm_facility(kvm, 76))
2722 /* Enable AES/DEA protected key functions by default */
2723 kvm->arch.crypto.aes_kw = 1;
2724 kvm->arch.crypto.dea_kw = 1;
2725 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2726 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2727 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2728 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2731 static void sca_dispose(struct kvm *kvm)
2733 if (kvm->arch.use_esca)
2734 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2736 free_page((unsigned long)(kvm->arch.sca));
2737 kvm->arch.sca = NULL;
2740 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2742 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2744 char debug_name[16];
2745 static unsigned long sca_offset;
2748 #ifdef CONFIG_KVM_S390_UCONTROL
2749 if (type & ~KVM_VM_S390_UCONTROL)
2751 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2758 rc = s390_enable_sie();
2764 if (!sclp.has_64bscao)
2765 alloc_flags |= GFP_DMA;
2766 rwlock_init(&kvm->arch.sca_lock);
2767 /* start with basic SCA */
2768 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2771 mutex_lock(&kvm_lock);
2773 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2775 kvm->arch.sca = (struct bsca_block *)
2776 ((char *) kvm->arch.sca + sca_offset);
2777 mutex_unlock(&kvm_lock);
2779 sprintf(debug_name, "kvm-%u", current->pid);
2781 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2785 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2786 kvm->arch.sie_page2 =
2787 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2788 if (!kvm->arch.sie_page2)
2791 kvm->arch.sie_page2->kvm = kvm;
2792 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2794 for (i = 0; i < kvm_s390_fac_size(); i++) {
2795 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2796 (kvm_s390_fac_base[i] |
2797 kvm_s390_fac_ext[i]);
2798 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2799 kvm_s390_fac_base[i];
2801 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2803 /* we are always in czam mode - even on pre z14 machines */
2804 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2805 set_kvm_facility(kvm->arch.model.fac_list, 138);
2806 /* we emulate STHYI in kvm */
2807 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2808 set_kvm_facility(kvm->arch.model.fac_list, 74);
2809 if (MACHINE_HAS_TLB_GUEST) {
2810 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2811 set_kvm_facility(kvm->arch.model.fac_list, 147);
2814 if (css_general_characteristics.aiv && test_facility(65))
2815 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2817 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2818 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2820 kvm_s390_crypto_init(kvm);
2822 mutex_init(&kvm->arch.float_int.ais_lock);
2823 spin_lock_init(&kvm->arch.float_int.lock);
2824 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2825 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2826 init_waitqueue_head(&kvm->arch.ipte_wq);
2827 mutex_init(&kvm->arch.ipte_mutex);
2829 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2830 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2832 if (type & KVM_VM_S390_UCONTROL) {
2833 kvm->arch.gmap = NULL;
2834 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2836 if (sclp.hamax == U64_MAX)
2837 kvm->arch.mem_limit = TASK_SIZE_MAX;
2839 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2841 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2842 if (!kvm->arch.gmap)
2844 kvm->arch.gmap->private = kvm;
2845 kvm->arch.gmap->pfault_enabled = 0;
2848 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2849 kvm->arch.use_skf = sclp.has_skey;
2850 spin_lock_init(&kvm->arch.start_stop_lock);
2851 kvm_s390_vsie_init(kvm);
2853 kvm_s390_gisa_init(kvm);
2854 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2858 free_page((unsigned long)kvm->arch.sie_page2);
2859 debug_unregister(kvm->arch.dbf);
2861 KVM_EVENT(3, "creation of vm failed: %d", rc);
2865 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2869 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2870 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2871 kvm_s390_clear_local_irqs(vcpu);
2872 kvm_clear_async_pf_completion_queue(vcpu);
2873 if (!kvm_is_ucontrol(vcpu->kvm))
2876 if (kvm_is_ucontrol(vcpu->kvm))
2877 gmap_remove(vcpu->arch.gmap);
2879 if (vcpu->kvm->arch.use_cmma)
2880 kvm_s390_vcpu_unsetup_cmma(vcpu);
2881 /* We can not hold the vcpu mutex here, we are already dying */
2882 if (kvm_s390_pv_cpu_get_handle(vcpu))
2883 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2884 free_page((unsigned long)(vcpu->arch.sie_block));
2887 void kvm_arch_destroy_vm(struct kvm *kvm)
2891 kvm_destroy_vcpus(kvm);
2893 kvm_s390_gisa_destroy(kvm);
2895 * We are already at the end of life and kvm->lock is not taken.
2896 * This is ok as the file descriptor is closed by now and nobody
2897 * can mess with the pv state. To avoid lockdep_assert_held from
2898 * complaining we do not use kvm_s390_pv_is_protected.
2900 if (kvm_s390_pv_get_handle(kvm))
2901 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2902 debug_unregister(kvm->arch.dbf);
2903 free_page((unsigned long)kvm->arch.sie_page2);
2904 if (!kvm_is_ucontrol(kvm))
2905 gmap_remove(kvm->arch.gmap);
2906 kvm_s390_destroy_adapters(kvm);
2907 kvm_s390_clear_float_irqs(kvm);
2908 kvm_s390_vsie_destroy(kvm);
2909 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2912 /* Section: vcpu related */
2913 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2915 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2916 if (!vcpu->arch.gmap)
2918 vcpu->arch.gmap->private = vcpu->kvm;
2923 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2925 if (!kvm_s390_use_sca_entries())
2927 read_lock(&vcpu->kvm->arch.sca_lock);
2928 if (vcpu->kvm->arch.use_esca) {
2929 struct esca_block *sca = vcpu->kvm->arch.sca;
2931 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2932 sca->cpu[vcpu->vcpu_id].sda = 0;
2934 struct bsca_block *sca = vcpu->kvm->arch.sca;
2936 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2937 sca->cpu[vcpu->vcpu_id].sda = 0;
2939 read_unlock(&vcpu->kvm->arch.sca_lock);
2942 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2944 if (!kvm_s390_use_sca_entries()) {
2945 struct bsca_block *sca = vcpu->kvm->arch.sca;
2947 /* we still need the basic sca for the ipte control */
2948 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2949 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2952 read_lock(&vcpu->kvm->arch.sca_lock);
2953 if (vcpu->kvm->arch.use_esca) {
2954 struct esca_block *sca = vcpu->kvm->arch.sca;
2956 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2957 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2958 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2959 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2960 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2962 struct bsca_block *sca = vcpu->kvm->arch.sca;
2964 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2965 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2966 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2967 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2969 read_unlock(&vcpu->kvm->arch.sca_lock);
2972 /* Basic SCA to Extended SCA data copy routines */
2973 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2976 d->sigp_ctrl.c = s->sigp_ctrl.c;
2977 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2980 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2984 d->ipte_control = s->ipte_control;
2986 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2987 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2990 static int sca_switch_to_extended(struct kvm *kvm)
2992 struct bsca_block *old_sca = kvm->arch.sca;
2993 struct esca_block *new_sca;
2994 struct kvm_vcpu *vcpu;
2995 unsigned long vcpu_idx;
2998 if (kvm->arch.use_esca)
3001 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3005 scaoh = (u32)((u64)(new_sca) >> 32);
3006 scaol = (u32)(u64)(new_sca) & ~0x3fU;
3008 kvm_s390_vcpu_block_all(kvm);
3009 write_lock(&kvm->arch.sca_lock);
3011 sca_copy_b_to_e(new_sca, old_sca);
3013 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3014 vcpu->arch.sie_block->scaoh = scaoh;
3015 vcpu->arch.sie_block->scaol = scaol;
3016 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3018 kvm->arch.sca = new_sca;
3019 kvm->arch.use_esca = 1;
3021 write_unlock(&kvm->arch.sca_lock);
3022 kvm_s390_vcpu_unblock_all(kvm);
3024 free_page((unsigned long)old_sca);
3026 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3027 old_sca, kvm->arch.sca);
3031 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3035 if (!kvm_s390_use_sca_entries()) {
3036 if (id < KVM_MAX_VCPUS)
3040 if (id < KVM_S390_BSCA_CPU_SLOTS)
3042 if (!sclp.has_esca || !sclp.has_64bscao)
3045 mutex_lock(&kvm->lock);
3046 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3047 mutex_unlock(&kvm->lock);
3049 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3052 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3053 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3055 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3056 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3057 vcpu->arch.cputm_start = get_tod_clock_fast();
3058 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3061 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3062 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3064 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3065 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3066 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3067 vcpu->arch.cputm_start = 0;
3068 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3071 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3072 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3074 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3075 vcpu->arch.cputm_enabled = true;
3076 __start_cpu_timer_accounting(vcpu);
3079 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3080 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3082 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3083 __stop_cpu_timer_accounting(vcpu);
3084 vcpu->arch.cputm_enabled = false;
3087 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3089 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3090 __enable_cpu_timer_accounting(vcpu);
3094 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3096 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3097 __disable_cpu_timer_accounting(vcpu);
3101 /* set the cpu timer - may only be called from the VCPU thread itself */
3102 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3104 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3105 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3106 if (vcpu->arch.cputm_enabled)
3107 vcpu->arch.cputm_start = get_tod_clock_fast();
3108 vcpu->arch.sie_block->cputm = cputm;
3109 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3113 /* update and get the cpu timer - can also be called from other VCPU threads */
3114 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3119 if (unlikely(!vcpu->arch.cputm_enabled))
3120 return vcpu->arch.sie_block->cputm;
3122 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3124 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3126 * If the writer would ever execute a read in the critical
3127 * section, e.g. in irq context, we have a deadlock.
3129 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3130 value = vcpu->arch.sie_block->cputm;
3131 /* if cputm_start is 0, accounting is being started/stopped */
3132 if (likely(vcpu->arch.cputm_start))
3133 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3134 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3139 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3142 gmap_enable(vcpu->arch.enabled_gmap);
3143 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3144 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3145 __start_cpu_timer_accounting(vcpu);
3149 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3152 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3153 __stop_cpu_timer_accounting(vcpu);
3154 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3155 vcpu->arch.enabled_gmap = gmap_get_enabled();
3156 gmap_disable(vcpu->arch.enabled_gmap);
3160 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3162 mutex_lock(&vcpu->kvm->lock);
3164 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3165 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3167 mutex_unlock(&vcpu->kvm->lock);
3168 if (!kvm_is_ucontrol(vcpu->kvm)) {
3169 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3172 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3173 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3174 /* make vcpu_load load the right gmap on the first trigger */
3175 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3178 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3180 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3181 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3186 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3188 /* At least one ECC subfunction must be present */
3189 return kvm_has_pckmo_subfunc(kvm, 32) ||
3190 kvm_has_pckmo_subfunc(kvm, 33) ||
3191 kvm_has_pckmo_subfunc(kvm, 34) ||
3192 kvm_has_pckmo_subfunc(kvm, 40) ||
3193 kvm_has_pckmo_subfunc(kvm, 41);
3197 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3200 * If the AP instructions are not being interpreted and the MSAX3
3201 * facility is not configured for the guest, there is nothing to set up.
3203 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3206 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3207 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3208 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3209 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3211 if (vcpu->kvm->arch.crypto.apie)
3212 vcpu->arch.sie_block->eca |= ECA_APIE;
3214 /* Set up protected key support */
3215 if (vcpu->kvm->arch.crypto.aes_kw) {
3216 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3217 /* ecc is also wrapped with AES key */
3218 if (kvm_has_pckmo_ecc(vcpu->kvm))
3219 vcpu->arch.sie_block->ecd |= ECD_ECC;
3222 if (vcpu->kvm->arch.crypto.dea_kw)
3223 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3226 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3228 free_page(vcpu->arch.sie_block->cbrlo);
3229 vcpu->arch.sie_block->cbrlo = 0;
3232 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3234 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3235 if (!vcpu->arch.sie_block->cbrlo)
3240 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3242 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3244 vcpu->arch.sie_block->ibc = model->ibc;
3245 if (test_kvm_facility(vcpu->kvm, 7))
3246 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3249 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3254 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3258 if (test_kvm_facility(vcpu->kvm, 78))
3259 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3260 else if (test_kvm_facility(vcpu->kvm, 8))
3261 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3263 kvm_s390_vcpu_setup_model(vcpu);
3265 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3266 if (MACHINE_HAS_ESOP)
3267 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3268 if (test_kvm_facility(vcpu->kvm, 9))
3269 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3270 if (test_kvm_facility(vcpu->kvm, 73))
3271 vcpu->arch.sie_block->ecb |= ECB_TE;
3272 if (!kvm_is_ucontrol(vcpu->kvm))
3273 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3275 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3276 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3277 if (test_kvm_facility(vcpu->kvm, 130))
3278 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3279 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3281 vcpu->arch.sie_block->eca |= ECA_CEI;
3283 vcpu->arch.sie_block->eca |= ECA_IB;
3285 vcpu->arch.sie_block->eca |= ECA_SII;
3286 if (sclp.has_sigpif)
3287 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3288 if (test_kvm_facility(vcpu->kvm, 129)) {
3289 vcpu->arch.sie_block->eca |= ECA_VX;
3290 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3292 if (test_kvm_facility(vcpu->kvm, 139))
3293 vcpu->arch.sie_block->ecd |= ECD_MEF;
3294 if (test_kvm_facility(vcpu->kvm, 156))
3295 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3296 if (vcpu->arch.sie_block->gd) {
3297 vcpu->arch.sie_block->eca |= ECA_AIV;
3298 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3299 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3301 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3303 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3306 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3308 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3310 if (vcpu->kvm->arch.use_cmma) {
3311 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3315 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3316 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3318 vcpu->arch.sie_block->hpid = HPID_KVM;
3320 kvm_s390_vcpu_crypto_setup(vcpu);
3322 mutex_lock(&vcpu->kvm->lock);
3323 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3324 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3326 kvm_s390_vcpu_unsetup_cmma(vcpu);
3328 mutex_unlock(&vcpu->kvm->lock);
3333 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3335 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3340 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3342 struct sie_page *sie_page;
3345 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3346 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3350 vcpu->arch.sie_block = &sie_page->sie_block;
3351 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3353 /* the real guest size will always be smaller than msl */
3354 vcpu->arch.sie_block->mso = 0;
3355 vcpu->arch.sie_block->msl = sclp.hamax;
3357 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3358 spin_lock_init(&vcpu->arch.local_int.lock);
3359 vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3360 seqcount_init(&vcpu->arch.cputm_seqcount);
3362 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3363 kvm_clear_async_pf_completion_queue(vcpu);
3364 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3371 kvm_s390_set_prefix(vcpu, 0);
3372 if (test_kvm_facility(vcpu->kvm, 64))
3373 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3374 if (test_kvm_facility(vcpu->kvm, 82))
3375 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3376 if (test_kvm_facility(vcpu->kvm, 133))
3377 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3378 if (test_kvm_facility(vcpu->kvm, 156))
3379 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3380 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3381 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3384 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3386 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3388 if (kvm_is_ucontrol(vcpu->kvm)) {
3389 rc = __kvm_ucontrol_vcpu_init(vcpu);
3391 goto out_free_sie_block;
3394 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3395 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3396 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3398 rc = kvm_s390_vcpu_setup(vcpu);
3400 goto out_ucontrol_uninit;
3403 out_ucontrol_uninit:
3404 if (kvm_is_ucontrol(vcpu->kvm))
3405 gmap_remove(vcpu->arch.gmap);
3407 free_page((unsigned long)(vcpu->arch.sie_block));
3411 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3413 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3414 return kvm_s390_vcpu_has_irq(vcpu, 0);
3417 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3419 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3422 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3424 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3428 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3430 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3433 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3435 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3439 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3441 return atomic_read(&vcpu->arch.sie_block->prog20) &
3442 (PROG_BLOCK_SIE | PROG_REQUEST);
3445 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3447 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3451 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3452 * If the CPU is not running (e.g. waiting as idle) the function will
3453 * return immediately. */
3454 void exit_sie(struct kvm_vcpu *vcpu)
3456 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3457 kvm_s390_vsie_kick(vcpu);
3458 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3462 /* Kick a guest cpu out of SIE to process a request synchronously */
3463 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3465 __kvm_make_request(req, vcpu);
3466 kvm_s390_vcpu_request(vcpu);
3469 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3472 struct kvm *kvm = gmap->private;
3473 struct kvm_vcpu *vcpu;
3474 unsigned long prefix;
3477 if (gmap_is_shadow(gmap))
3479 if (start >= 1UL << 31)
3480 /* We are only interested in prefix pages */
3482 kvm_for_each_vcpu(i, vcpu, kvm) {
3483 /* match against both prefix pages */
3484 prefix = kvm_s390_get_prefix(vcpu);
3485 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3486 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3488 kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3493 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3495 /* do not poll with more than halt_poll_max_steal percent of steal time */
3496 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3497 READ_ONCE(halt_poll_max_steal)) {
3498 vcpu->stat.halt_no_poll_steal++;
3504 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3506 /* kvm common code refers to this, but never calls it */
3511 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3512 struct kvm_one_reg *reg)
3517 case KVM_REG_S390_TODPR:
3518 r = put_user(vcpu->arch.sie_block->todpr,
3519 (u32 __user *)reg->addr);
3521 case KVM_REG_S390_EPOCHDIFF:
3522 r = put_user(vcpu->arch.sie_block->epoch,
3523 (u64 __user *)reg->addr);
3525 case KVM_REG_S390_CPU_TIMER:
3526 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3527 (u64 __user *)reg->addr);
3529 case KVM_REG_S390_CLOCK_COMP:
3530 r = put_user(vcpu->arch.sie_block->ckc,
3531 (u64 __user *)reg->addr);
3533 case KVM_REG_S390_PFTOKEN:
3534 r = put_user(vcpu->arch.pfault_token,
3535 (u64 __user *)reg->addr);
3537 case KVM_REG_S390_PFCOMPARE:
3538 r = put_user(vcpu->arch.pfault_compare,
3539 (u64 __user *)reg->addr);
3541 case KVM_REG_S390_PFSELECT:
3542 r = put_user(vcpu->arch.pfault_select,
3543 (u64 __user *)reg->addr);
3545 case KVM_REG_S390_PP:
3546 r = put_user(vcpu->arch.sie_block->pp,
3547 (u64 __user *)reg->addr);
3549 case KVM_REG_S390_GBEA:
3550 r = put_user(vcpu->arch.sie_block->gbea,
3551 (u64 __user *)reg->addr);
3560 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3561 struct kvm_one_reg *reg)
3567 case KVM_REG_S390_TODPR:
3568 r = get_user(vcpu->arch.sie_block->todpr,
3569 (u32 __user *)reg->addr);
3571 case KVM_REG_S390_EPOCHDIFF:
3572 r = get_user(vcpu->arch.sie_block->epoch,
3573 (u64 __user *)reg->addr);
3575 case KVM_REG_S390_CPU_TIMER:
3576 r = get_user(val, (u64 __user *)reg->addr);
3578 kvm_s390_set_cpu_timer(vcpu, val);
3580 case KVM_REG_S390_CLOCK_COMP:
3581 r = get_user(vcpu->arch.sie_block->ckc,
3582 (u64 __user *)reg->addr);
3584 case KVM_REG_S390_PFTOKEN:
3585 r = get_user(vcpu->arch.pfault_token,
3586 (u64 __user *)reg->addr);
3587 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3588 kvm_clear_async_pf_completion_queue(vcpu);
3590 case KVM_REG_S390_PFCOMPARE:
3591 r = get_user(vcpu->arch.pfault_compare,
3592 (u64 __user *)reg->addr);
3594 case KVM_REG_S390_PFSELECT:
3595 r = get_user(vcpu->arch.pfault_select,
3596 (u64 __user *)reg->addr);
3598 case KVM_REG_S390_PP:
3599 r = get_user(vcpu->arch.sie_block->pp,
3600 (u64 __user *)reg->addr);
3602 case KVM_REG_S390_GBEA:
3603 r = get_user(vcpu->arch.sie_block->gbea,
3604 (u64 __user *)reg->addr);
3613 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3615 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3616 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3617 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3619 kvm_clear_async_pf_completion_queue(vcpu);
3620 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3621 kvm_s390_vcpu_stop(vcpu);
3622 kvm_s390_clear_local_irqs(vcpu);
3625 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3627 /* Initial reset is a superset of the normal reset */
3628 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3631 * This equals initial cpu reset in pop, but we don't switch to ESA.
3632 * We do not only reset the internal data, but also ...
3634 vcpu->arch.sie_block->gpsw.mask = 0;
3635 vcpu->arch.sie_block->gpsw.addr = 0;
3636 kvm_s390_set_prefix(vcpu, 0);
3637 kvm_s390_set_cpu_timer(vcpu, 0);
3638 vcpu->arch.sie_block->ckc = 0;
3639 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3640 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3641 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3643 /* ... the data in sync regs */
3644 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3645 vcpu->run->s.regs.ckc = 0;
3646 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3647 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3648 vcpu->run->psw_addr = 0;
3649 vcpu->run->psw_mask = 0;
3650 vcpu->run->s.regs.todpr = 0;
3651 vcpu->run->s.regs.cputm = 0;
3652 vcpu->run->s.regs.ckc = 0;
3653 vcpu->run->s.regs.pp = 0;
3654 vcpu->run->s.regs.gbea = 1;
3655 vcpu->run->s.regs.fpc = 0;
3657 * Do not reset these registers in the protected case, as some of
3658 * them are overlayed and they are not accessible in this case
3661 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3662 vcpu->arch.sie_block->gbea = 1;
3663 vcpu->arch.sie_block->pp = 0;
3664 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3665 vcpu->arch.sie_block->todpr = 0;
3669 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3671 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3673 /* Clear reset is a superset of the initial reset */
3674 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3676 memset(®s->gprs, 0, sizeof(regs->gprs));
3677 memset(®s->vrs, 0, sizeof(regs->vrs));
3678 memset(®s->acrs, 0, sizeof(regs->acrs));
3679 memset(®s->gscb, 0, sizeof(regs->gscb));
3682 regs->etoken_extension = 0;
3685 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3688 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3693 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3696 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3701 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3702 struct kvm_sregs *sregs)
3706 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3707 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3713 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3714 struct kvm_sregs *sregs)
3718 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3719 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3725 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3731 if (test_fp_ctl(fpu->fpc)) {
3735 vcpu->run->s.regs.fpc = fpu->fpc;
3737 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3738 (freg_t *) fpu->fprs);
3740 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3747 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3751 /* make sure we have the latest values */
3754 convert_vx_to_fp((freg_t *) fpu->fprs,
3755 (__vector128 *) vcpu->run->s.regs.vrs);
3757 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3758 fpu->fpc = vcpu->run->s.regs.fpc;
3764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3768 if (!is_vcpu_stopped(vcpu))
3771 vcpu->run->psw_mask = psw.mask;
3772 vcpu->run->psw_addr = psw.addr;
3777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3778 struct kvm_translation *tr)
3780 return -EINVAL; /* not implemented yet */
3783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3784 KVM_GUESTDBG_USE_HW_BP | \
3785 KVM_GUESTDBG_ENABLE)
3787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3788 struct kvm_guest_debug *dbg)
3794 vcpu->guest_debug = 0;
3795 kvm_s390_clear_bp_data(vcpu);
3797 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3801 if (!sclp.has_gpere) {
3806 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3807 vcpu->guest_debug = dbg->control;
3808 /* enforce guest PER */
3809 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3811 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3812 rc = kvm_s390_import_bp_data(vcpu, dbg);
3814 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3815 vcpu->arch.guestdbg.last_bp = 0;
3819 vcpu->guest_debug = 0;
3820 kvm_s390_clear_bp_data(vcpu);
3821 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3829 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3830 struct kvm_mp_state *mp_state)
3836 /* CHECK_STOP and LOAD are not supported yet */
3837 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3838 KVM_MP_STATE_OPERATING;
3844 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3845 struct kvm_mp_state *mp_state)
3851 /* user space knows about this interface - let it control the state */
3852 kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3854 switch (mp_state->mp_state) {
3855 case KVM_MP_STATE_STOPPED:
3856 rc = kvm_s390_vcpu_stop(vcpu);
3858 case KVM_MP_STATE_OPERATING:
3859 rc = kvm_s390_vcpu_start(vcpu);
3861 case KVM_MP_STATE_LOAD:
3862 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3866 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3868 case KVM_MP_STATE_CHECK_STOP:
3869 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3878 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3880 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3883 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3886 kvm_s390_vcpu_request_handled(vcpu);
3887 if (!kvm_request_pending(vcpu))
3890 * If the guest prefix changed, re-arm the ipte notifier for the
3891 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3892 * This ensures that the ipte instruction for this request has
3893 * already finished. We might race against a second unmapper that
3894 * wants to set the blocking bit. Lets just retry the request loop.
3896 if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
3898 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3899 kvm_s390_get_prefix(vcpu),
3900 PAGE_SIZE * 2, PROT_WRITE);
3902 kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3908 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3909 vcpu->arch.sie_block->ihcpu = 0xffff;
3913 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3914 if (!ibs_enabled(vcpu)) {
3915 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3916 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3921 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3922 if (ibs_enabled(vcpu)) {
3923 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3924 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3929 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3930 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3934 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3936 * Disable CMM virtualization; we will emulate the ESSA
3937 * instruction manually, in order to provide additional
3938 * functionalities needed for live migration.
3940 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3944 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3946 * Re-enable CMM virtualization if CMMA is available and
3947 * CMM has been used.
3949 if ((vcpu->kvm->arch.use_cmma) &&
3950 (vcpu->kvm->mm->context.uses_cmm))
3951 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3955 /* nothing to do, just clear the request */
3956 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3957 /* we left the vsie handler, nothing to do, just clear the request */
3958 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3963 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3965 struct kvm_vcpu *vcpu;
3966 union tod_clock clk;
3971 store_tod_clock_ext(&clk);
3973 kvm->arch.epoch = gtod->tod - clk.tod;
3975 if (test_kvm_facility(kvm, 139)) {
3976 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3977 if (kvm->arch.epoch > gtod->tod)
3978 kvm->arch.epdx -= 1;
3981 kvm_s390_vcpu_block_all(kvm);
3982 kvm_for_each_vcpu(i, vcpu, kvm) {
3983 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3984 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3987 kvm_s390_vcpu_unblock_all(kvm);
3991 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3993 mutex_lock(&kvm->lock);
3994 __kvm_s390_set_tod_clock(kvm, gtod);
3995 mutex_unlock(&kvm->lock);
3998 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4000 if (!mutex_trylock(&kvm->lock))
4002 __kvm_s390_set_tod_clock(kvm, gtod);
4003 mutex_unlock(&kvm->lock);
4008 * kvm_arch_fault_in_page - fault-in guest page if necessary
4009 * @vcpu: The corresponding virtual cpu
4010 * @gpa: Guest physical address
4011 * @writable: Whether the page should be writable or not
4013 * Make sure that a guest page has been faulted-in on the host.
4015 * Return: Zero on success, negative error code otherwise.
4017 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4019 return gmap_fault(vcpu->arch.gmap, gpa,
4020 writable ? FAULT_FLAG_WRITE : 0);
4023 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4024 unsigned long token)
4026 struct kvm_s390_interrupt inti;
4027 struct kvm_s390_irq irq;
4030 irq.u.ext.ext_params2 = token;
4031 irq.type = KVM_S390_INT_PFAULT_INIT;
4032 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4034 inti.type = KVM_S390_INT_PFAULT_DONE;
4035 inti.parm64 = token;
4036 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4040 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4041 struct kvm_async_pf *work)
4043 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4044 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4049 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4050 struct kvm_async_pf *work)
4052 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4053 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4056 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4057 struct kvm_async_pf *work)
4059 /* s390 will always inject the page directly */
4062 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4065 * s390 will always inject the page directly,
4066 * but we still want check_async_completion to cleanup
4071 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4074 struct kvm_arch_async_pf arch;
4076 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4078 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4079 vcpu->arch.pfault_compare)
4081 if (psw_extint_disabled(vcpu))
4083 if (kvm_s390_vcpu_has_irq(vcpu, 0))
4085 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4087 if (!vcpu->arch.gmap->pfault_enabled)
4090 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4091 hva += current->thread.gmap_addr & ~PAGE_MASK;
4092 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4095 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4098 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4103 * On s390 notifications for arriving pages will be delivered directly
4104 * to the guest but the house keeping for completed pfaults is
4105 * handled outside the worker.
4107 kvm_check_async_pf_completion(vcpu);
4109 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4110 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4115 if (!kvm_is_ucontrol(vcpu->kvm)) {
4116 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4121 rc = kvm_s390_handle_requests(vcpu);
4125 if (guestdbg_enabled(vcpu)) {
4126 kvm_s390_backup_guest_per_regs(vcpu);
4127 kvm_s390_patch_guest_per_regs(vcpu);
4130 clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4132 vcpu->arch.sie_block->icptcode = 0;
4133 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4134 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4135 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4140 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4142 struct kvm_s390_pgm_info pgm_info = {
4143 .code = PGM_ADDRESSING,
4148 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4149 trace_kvm_s390_sie_fault(vcpu);
4152 * We want to inject an addressing exception, which is defined as a
4153 * suppressing or terminating exception. However, since we came here
4154 * by a DAT access exception, the PSW still points to the faulting
4155 * instruction since DAT exceptions are nullifying. So we've got
4156 * to look up the current opcode to get the length of the instruction
4157 * to be able to forward the PSW.
4159 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4160 ilen = insn_length(opcode);
4164 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4165 * Forward by arbitrary ilc, injection will take care of
4166 * nullification if necessary.
4168 pgm_info = vcpu->arch.pgm;
4171 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4172 kvm_s390_forward_psw(vcpu, ilen);
4173 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4176 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4178 struct mcck_volatile_info *mcck_info;
4179 struct sie_page *sie_page;
4181 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4182 vcpu->arch.sie_block->icptcode);
4183 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4185 if (guestdbg_enabled(vcpu))
4186 kvm_s390_restore_guest_per_regs(vcpu);
4188 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4189 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4191 if (exit_reason == -EINTR) {
4192 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4193 sie_page = container_of(vcpu->arch.sie_block,
4194 struct sie_page, sie_block);
4195 mcck_info = &sie_page->mcck_info;
4196 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4200 if (vcpu->arch.sie_block->icptcode > 0) {
4201 int rc = kvm_handle_sie_intercept(vcpu);
4203 if (rc != -EOPNOTSUPP)
4205 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4206 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4207 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4208 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4210 } else if (exit_reason != -EFAULT) {
4211 vcpu->stat.exit_null++;
4213 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4214 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4215 vcpu->run->s390_ucontrol.trans_exc_code =
4216 current->thread.gmap_addr;
4217 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4219 } else if (current->thread.gmap_pfault) {
4220 trace_kvm_s390_major_guest_pfault(vcpu);
4221 current->thread.gmap_pfault = 0;
4222 if (kvm_arch_setup_async_pf(vcpu))
4224 vcpu->stat.pfault_sync++;
4225 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4227 return vcpu_post_run_fault_in_sie(vcpu);
4230 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4231 static int __vcpu_run(struct kvm_vcpu *vcpu)
4233 int rc, exit_reason;
4234 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4237 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4238 * ning the guest), so that memslots (and other stuff) are protected
4240 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4243 rc = vcpu_pre_run(vcpu);
4247 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4249 * As PF_VCPU will be used in fault handler, between
4250 * guest_enter and guest_exit should be no uaccess.
4252 local_irq_disable();
4253 guest_enter_irqoff();
4254 __disable_cpu_timer_accounting(vcpu);
4256 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4257 memcpy(sie_page->pv_grregs,
4258 vcpu->run->s.regs.gprs,
4259 sizeof(sie_page->pv_grregs));
4261 if (test_cpu_flag(CIF_FPU))
4263 exit_reason = sie64a(vcpu->arch.sie_block,
4264 vcpu->run->s.regs.gprs);
4265 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4266 memcpy(vcpu->run->s.regs.gprs,
4267 sie_page->pv_grregs,
4268 sizeof(sie_page->pv_grregs));
4270 * We're not allowed to inject interrupts on intercepts
4271 * that leave the guest state in an "in-between" state
4272 * where the next SIE entry will do a continuation.
4273 * Fence interrupts in our "internal" PSW.
4275 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4276 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4277 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4280 local_irq_disable();
4281 __enable_cpu_timer_accounting(vcpu);
4282 guest_exit_irqoff();
4284 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4286 rc = vcpu_post_run(vcpu, exit_reason);
4287 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4289 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4293 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4295 struct kvm_run *kvm_run = vcpu->run;
4296 struct runtime_instr_cb *riccb;
4299 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4300 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4301 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4302 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4303 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4304 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4305 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4306 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4308 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4309 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4310 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4311 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4312 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4313 kvm_clear_async_pf_completion_queue(vcpu);
4315 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4316 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4317 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4318 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4321 * If userspace sets the riccb (e.g. after migration) to a valid state,
4322 * we should enable RI here instead of doing the lazy enablement.
4324 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4325 test_kvm_facility(vcpu->kvm, 64) &&
4327 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4328 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4329 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4332 * If userspace sets the gscb (e.g. after migration) to non-zero,
4333 * we should enable GS here instead of doing the lazy enablement.
4335 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4336 test_kvm_facility(vcpu->kvm, 133) &&
4338 !vcpu->arch.gs_enabled) {
4339 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4340 vcpu->arch.sie_block->ecb |= ECB_GS;
4341 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4342 vcpu->arch.gs_enabled = 1;
4344 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4345 test_kvm_facility(vcpu->kvm, 82)) {
4346 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4347 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4349 if (MACHINE_HAS_GS) {
4351 __ctl_set_bit(2, 4);
4352 if (current->thread.gs_cb) {
4353 vcpu->arch.host_gscb = current->thread.gs_cb;
4354 save_gs_cb(vcpu->arch.host_gscb);
4356 if (vcpu->arch.gs_enabled) {
4357 current->thread.gs_cb = (struct gs_cb *)
4358 &vcpu->run->s.regs.gscb;
4359 restore_gs_cb(current->thread.gs_cb);
4363 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4366 static void sync_regs(struct kvm_vcpu *vcpu)
4368 struct kvm_run *kvm_run = vcpu->run;
4370 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4371 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4372 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4373 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4374 /* some control register changes require a tlb flush */
4375 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4377 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4378 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4379 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4381 save_access_regs(vcpu->arch.host_acrs);
4382 restore_access_regs(vcpu->run->s.regs.acrs);
4383 /* save host (userspace) fprs/vrs */
4385 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4386 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4388 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4390 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4391 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4392 if (test_fp_ctl(current->thread.fpu.fpc))
4393 /* User space provided an invalid FPC, let's clear it */
4394 current->thread.fpu.fpc = 0;
4396 /* Sync fmt2 only data */
4397 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4398 sync_regs_fmt2(vcpu);
4401 * In several places we have to modify our internal view to
4402 * not do things that are disallowed by the ultravisor. For
4403 * example we must not inject interrupts after specific exits
4404 * (e.g. 112 prefix page not secure). We do this by turning
4405 * off the machine check, external and I/O interrupt bits
4406 * of our PSW copy. To avoid getting validity intercepts, we
4407 * do only accept the condition code from userspace.
4409 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4410 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4414 kvm_run->kvm_dirty_regs = 0;
4417 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4419 struct kvm_run *kvm_run = vcpu->run;
4421 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4422 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4423 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4424 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4425 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4426 if (MACHINE_HAS_GS) {
4428 __ctl_set_bit(2, 4);
4429 if (vcpu->arch.gs_enabled)
4430 save_gs_cb(current->thread.gs_cb);
4431 current->thread.gs_cb = vcpu->arch.host_gscb;
4432 restore_gs_cb(vcpu->arch.host_gscb);
4433 if (!vcpu->arch.host_gscb)
4434 __ctl_clear_bit(2, 4);
4435 vcpu->arch.host_gscb = NULL;
4438 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4441 static void store_regs(struct kvm_vcpu *vcpu)
4443 struct kvm_run *kvm_run = vcpu->run;
4445 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4446 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4447 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4448 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4449 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4450 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4451 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4452 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4453 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4454 save_access_regs(vcpu->run->s.regs.acrs);
4455 restore_access_regs(vcpu->arch.host_acrs);
4456 /* Save guest register state */
4458 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4459 /* Restore will be done lazily at return */
4460 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4461 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4462 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4463 store_regs_fmt2(vcpu);
4466 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4468 struct kvm_run *kvm_run = vcpu->run;
4471 if (kvm_run->immediate_exit)
4474 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4475 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4480 if (guestdbg_exit_pending(vcpu)) {
4481 kvm_s390_prepare_debug_exit(vcpu);
4486 kvm_sigset_activate(vcpu);
4489 * no need to check the return value of vcpu_start as it can only have
4490 * an error for protvirt, but protvirt means user cpu state
4492 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4493 kvm_s390_vcpu_start(vcpu);
4494 } else if (is_vcpu_stopped(vcpu)) {
4495 pr_err_ratelimited("can't run stopped vcpu %d\n",
4502 enable_cpu_timer_accounting(vcpu);
4505 rc = __vcpu_run(vcpu);
4507 if (signal_pending(current) && !rc) {
4508 kvm_run->exit_reason = KVM_EXIT_INTR;
4512 if (guestdbg_exit_pending(vcpu) && !rc) {
4513 kvm_s390_prepare_debug_exit(vcpu);
4517 if (rc == -EREMOTE) {
4518 /* userspace support is needed, kvm_run has been prepared */
4522 disable_cpu_timer_accounting(vcpu);
4525 kvm_sigset_deactivate(vcpu);
4527 vcpu->stat.exit_userspace++;
4534 * store status at address
4535 * we use have two special cases:
4536 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4537 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4539 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4541 unsigned char archmode = 1;
4542 freg_t fprs[NUM_FPRS];
4547 px = kvm_s390_get_prefix(vcpu);
4548 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4549 if (write_guest_abs(vcpu, 163, &archmode, 1))
4552 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4553 if (write_guest_real(vcpu, 163, &archmode, 1))
4557 gpa -= __LC_FPREGS_SAVE_AREA;
4559 /* manually convert vector registers if necessary */
4560 if (MACHINE_HAS_VX) {
4561 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4562 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4565 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4566 vcpu->run->s.regs.fprs, 128);
4568 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4569 vcpu->run->s.regs.gprs, 128);
4570 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4571 &vcpu->arch.sie_block->gpsw, 16);
4572 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4574 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4575 &vcpu->run->s.regs.fpc, 4);
4576 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4577 &vcpu->arch.sie_block->todpr, 4);
4578 cputm = kvm_s390_get_cpu_timer(vcpu);
4579 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4581 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4582 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4584 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4585 &vcpu->run->s.regs.acrs, 64);
4586 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4587 &vcpu->arch.sie_block->gcr, 128);
4588 return rc ? -EFAULT : 0;
4591 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4594 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4595 * switch in the run ioctl. Let's update our copies before we save
4596 * it into the save area
4599 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4600 save_access_regs(vcpu->run->s.regs.acrs);
4602 return kvm_s390_store_status_unloaded(vcpu, addr);
4605 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4607 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4608 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4611 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4614 struct kvm_vcpu *vcpu;
4616 kvm_for_each_vcpu(i, vcpu, kvm) {
4617 __disable_ibs_on_vcpu(vcpu);
4621 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4625 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4626 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4629 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4631 int i, online_vcpus, r = 0, started_vcpus = 0;
4633 if (!is_vcpu_stopped(vcpu))
4636 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4637 /* Only one cpu at a time may enter/leave the STOPPED state. */
4638 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4639 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4641 /* Let's tell the UV that we want to change into the operating state */
4642 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4643 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4645 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4650 for (i = 0; i < online_vcpus; i++) {
4651 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4655 if (started_vcpus == 0) {
4656 /* we're the only active VCPU -> speed it up */
4657 __enable_ibs_on_vcpu(vcpu);
4658 } else if (started_vcpus == 1) {
4660 * As we are starting a second VCPU, we have to disable
4661 * the IBS facility on all VCPUs to remove potentially
4662 * outstanding ENABLE requests.
4664 __disable_ibs_on_all_vcpus(vcpu->kvm);
4667 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4669 * The real PSW might have changed due to a RESTART interpreted by the
4670 * ultravisor. We block all interrupts and let the next sie exit
4673 if (kvm_s390_pv_cpu_is_protected(vcpu))
4674 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4676 * Another VCPU might have used IBS while we were offline.
4677 * Let's play safe and flush the VCPU at startup.
4679 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4680 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4684 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4686 int i, online_vcpus, r = 0, started_vcpus = 0;
4687 struct kvm_vcpu *started_vcpu = NULL;
4689 if (is_vcpu_stopped(vcpu))
4692 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4693 /* Only one cpu at a time may enter/leave the STOPPED state. */
4694 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4695 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4697 /* Let's tell the UV that we want to change into the stopped state */
4698 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4699 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4701 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4707 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4708 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4709 * have been fully processed. This will ensure that the VCPU
4710 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4712 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4713 kvm_s390_clear_stop_irq(vcpu);
4715 __disable_ibs_on_vcpu(vcpu);
4717 for (i = 0; i < online_vcpus; i++) {
4718 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4720 if (!is_vcpu_stopped(tmp)) {
4726 if (started_vcpus == 1) {
4728 * As we only have one VCPU left, we want to enable the
4729 * IBS facility for that VCPU to speed it up.
4731 __enable_ibs_on_vcpu(started_vcpu);
4734 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4738 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4739 struct kvm_enable_cap *cap)
4747 case KVM_CAP_S390_CSS_SUPPORT:
4748 if (!vcpu->kvm->arch.css_support) {
4749 vcpu->kvm->arch.css_support = 1;
4750 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4751 trace_kvm_s390_enable_css(vcpu->kvm);
4762 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4763 struct kvm_s390_mem_op *mop)
4765 void __user *uaddr = (void __user *)mop->buf;
4768 if (mop->flags || !mop->size)
4770 if (mop->size + mop->sida_offset < mop->size)
4772 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4774 if (!kvm_s390_pv_cpu_is_protected(vcpu))
4778 case KVM_S390_MEMOP_SIDA_READ:
4779 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4780 mop->sida_offset), mop->size))
4784 case KVM_S390_MEMOP_SIDA_WRITE:
4785 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4786 mop->sida_offset), uaddr, mop->size))
4793 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4794 struct kvm_s390_mem_op *mop)
4796 void __user *uaddr = (void __user *)mop->buf;
4797 void *tmpbuf = NULL;
4799 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4800 | KVM_S390_MEMOP_F_CHECK_ONLY
4801 | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4803 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4805 if (mop->size > MEM_OP_MAX_SIZE)
4807 if (kvm_s390_pv_cpu_is_protected(vcpu))
4809 if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4810 if (access_key_invalid(mop->key))
4815 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4816 tmpbuf = vmalloc(mop->size);
4822 case KVM_S390_MEMOP_LOGICAL_READ:
4823 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4824 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4825 GACC_FETCH, mop->key);
4828 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4829 mop->size, mop->key);
4831 if (copy_to_user(uaddr, tmpbuf, mop->size))
4835 case KVM_S390_MEMOP_LOGICAL_WRITE:
4836 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4837 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4838 GACC_STORE, mop->key);
4841 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4845 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4846 mop->size, mop->key);
4850 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4851 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4857 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4858 struct kvm_s390_mem_op *mop)
4862 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4865 case KVM_S390_MEMOP_LOGICAL_READ:
4866 case KVM_S390_MEMOP_LOGICAL_WRITE:
4867 r = kvm_s390_vcpu_mem_op(vcpu, mop);
4869 case KVM_S390_MEMOP_SIDA_READ:
4870 case KVM_S390_MEMOP_SIDA_WRITE:
4871 /* we are locked against sida going away by the vcpu->mutex */
4872 r = kvm_s390_vcpu_sida_op(vcpu, mop);
4878 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4882 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4883 unsigned int ioctl, unsigned long arg)
4885 struct kvm_vcpu *vcpu = filp->private_data;
4886 void __user *argp = (void __user *)arg;
4889 case KVM_S390_IRQ: {
4890 struct kvm_s390_irq s390irq;
4892 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4894 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4896 case KVM_S390_INTERRUPT: {
4897 struct kvm_s390_interrupt s390int;
4898 struct kvm_s390_irq s390irq = {};
4900 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4902 if (s390int_to_s390irq(&s390int, &s390irq))
4904 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4907 return -ENOIOCTLCMD;
4910 long kvm_arch_vcpu_ioctl(struct file *filp,
4911 unsigned int ioctl, unsigned long arg)
4913 struct kvm_vcpu *vcpu = filp->private_data;
4914 void __user *argp = (void __user *)arg;
4922 case KVM_S390_STORE_STATUS:
4923 idx = srcu_read_lock(&vcpu->kvm->srcu);
4924 r = kvm_s390_store_status_unloaded(vcpu, arg);
4925 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4927 case KVM_S390_SET_INITIAL_PSW: {
4931 if (copy_from_user(&psw, argp, sizeof(psw)))
4933 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4936 case KVM_S390_CLEAR_RESET:
4938 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4939 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4940 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4941 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4942 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4946 case KVM_S390_INITIAL_RESET:
4948 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4949 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4950 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4951 UVC_CMD_CPU_RESET_INITIAL,
4953 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4957 case KVM_S390_NORMAL_RESET:
4959 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4960 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4961 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4962 UVC_CMD_CPU_RESET, &rc, &rrc);
4963 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4967 case KVM_SET_ONE_REG:
4968 case KVM_GET_ONE_REG: {
4969 struct kvm_one_reg reg;
4971 if (kvm_s390_pv_cpu_is_protected(vcpu))
4974 if (copy_from_user(®, argp, sizeof(reg)))
4976 if (ioctl == KVM_SET_ONE_REG)
4977 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4979 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4982 #ifdef CONFIG_KVM_S390_UCONTROL
4983 case KVM_S390_UCAS_MAP: {
4984 struct kvm_s390_ucas_mapping ucasmap;
4986 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4991 if (!kvm_is_ucontrol(vcpu->kvm)) {
4996 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4997 ucasmap.vcpu_addr, ucasmap.length);
5000 case KVM_S390_UCAS_UNMAP: {
5001 struct kvm_s390_ucas_mapping ucasmap;
5003 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5008 if (!kvm_is_ucontrol(vcpu->kvm)) {
5013 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5018 case KVM_S390_VCPU_FAULT: {
5019 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5022 case KVM_ENABLE_CAP:
5024 struct kvm_enable_cap cap;
5026 if (copy_from_user(&cap, argp, sizeof(cap)))
5028 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5031 case KVM_S390_MEM_OP: {
5032 struct kvm_s390_mem_op mem_op;
5034 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5035 r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5040 case KVM_S390_SET_IRQ_STATE: {
5041 struct kvm_s390_irq_state irq_state;
5044 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5046 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5047 irq_state.len == 0 ||
5048 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5052 /* do not use irq_state.flags, it will break old QEMUs */
5053 r = kvm_s390_set_irq_state(vcpu,
5054 (void __user *) irq_state.buf,
5058 case KVM_S390_GET_IRQ_STATE: {
5059 struct kvm_s390_irq_state irq_state;
5062 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5064 if (irq_state.len == 0) {
5068 /* do not use irq_state.flags, it will break old QEMUs */
5069 r = kvm_s390_get_irq_state(vcpu,
5070 (__u8 __user *) irq_state.buf,
5082 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5084 #ifdef CONFIG_KVM_S390_UCONTROL
5085 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5086 && (kvm_is_ucontrol(vcpu->kvm))) {
5087 vmf->page = virt_to_page(vcpu->arch.sie_block);
5088 get_page(vmf->page);
5092 return VM_FAULT_SIGBUS;
5095 /* Section: memory related */
5096 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5097 const struct kvm_memory_slot *old,
5098 struct kvm_memory_slot *new,
5099 enum kvm_mr_change change)
5103 /* When we are protected, we should not change the memory slots */
5104 if (kvm_s390_pv_get_handle(kvm))
5107 if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5110 /* A few sanity checks. We can have memory slots which have to be
5111 located/ended at a segment boundary (1MB). The memory in userland is
5112 ok to be fragmented into various different vmas. It is okay to mmap()
5113 and munmap() stuff in this slot after doing this call at any time */
5115 if (new->userspace_addr & 0xffffful)
5118 size = new->npages * PAGE_SIZE;
5119 if (size & 0xffffful)
5122 if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5128 void kvm_arch_commit_memory_region(struct kvm *kvm,
5129 struct kvm_memory_slot *old,
5130 const struct kvm_memory_slot *new,
5131 enum kvm_mr_change change)
5137 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5138 old->npages * PAGE_SIZE);
5141 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5142 old->npages * PAGE_SIZE);
5147 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5148 new->base_gfn * PAGE_SIZE,
5149 new->npages * PAGE_SIZE);
5151 case KVM_MR_FLAGS_ONLY:
5154 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5157 pr_warn("failed to commit memory region\n");
5161 static inline unsigned long nonhyp_mask(int i)
5163 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5165 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5168 static int __init kvm_s390_init(void)
5172 if (!sclp.has_sief2) {
5173 pr_info("SIE is not available\n");
5177 if (nested && hpage) {
5178 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5182 for (i = 0; i < 16; i++)
5183 kvm_s390_fac_base[i] |=
5184 stfle_fac_list[i] & nonhyp_mask(i);
5186 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5189 static void __exit kvm_s390_exit(void)
5194 module_init(kvm_s390_init);
5195 module_exit(kvm_s390_exit);
5198 * Enable autoloading of the kvm module.
5199 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5200 * since x86 takes a different approach.
5202 #include <linux/miscdevice.h>
5203 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5204 MODULE_ALIAS("devname:kvm");