1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2020
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
41 #include <asm/switch_to.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
48 #include <asm/fpu/api.h>
52 #define CREATE_TRACE_POINTS
54 #include "trace-s390.h"
56 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
59 (KVM_MAX_VCPUS + LOCAL_IRQS))
61 struct kvm_stats_debugfs_item debugfs_entries[] = {
62 VCPU_STAT("userspace_handled", exit_userspace),
63 VCPU_STAT("exit_null", exit_null),
64 VCPU_STAT("pfault_sync", pfault_sync),
65 VCPU_STAT("exit_validity", exit_validity),
66 VCPU_STAT("exit_stop_request", exit_stop_request),
67 VCPU_STAT("exit_external_request", exit_external_request),
68 VCPU_STAT("exit_io_request", exit_io_request),
69 VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
70 VCPU_STAT("exit_instruction", exit_instruction),
71 VCPU_STAT("exit_pei", exit_pei),
72 VCPU_STAT("exit_program_interruption", exit_program_interruption),
73 VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
74 VCPU_STAT("exit_operation_exception", exit_operation_exception),
75 VCPU_STAT("halt_successful_poll", halt_successful_poll),
76 VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
77 VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
78 VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
79 VCPU_STAT("halt_wakeup", halt_wakeup),
80 VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
81 VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
82 VCPU_STAT("instruction_lctlg", instruction_lctlg),
83 VCPU_STAT("instruction_lctl", instruction_lctl),
84 VCPU_STAT("instruction_stctl", instruction_stctl),
85 VCPU_STAT("instruction_stctg", instruction_stctg),
86 VCPU_STAT("deliver_ckc", deliver_ckc),
87 VCPU_STAT("deliver_cputm", deliver_cputm),
88 VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
89 VCPU_STAT("deliver_external_call", deliver_external_call),
90 VCPU_STAT("deliver_service_signal", deliver_service_signal),
91 VCPU_STAT("deliver_virtio", deliver_virtio),
92 VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
93 VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
94 VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
95 VCPU_STAT("deliver_program", deliver_program),
96 VCPU_STAT("deliver_io", deliver_io),
97 VCPU_STAT("deliver_machine_check", deliver_machine_check),
98 VCPU_STAT("exit_wait_state", exit_wait_state),
99 VCPU_STAT("inject_ckc", inject_ckc),
100 VCPU_STAT("inject_cputm", inject_cputm),
101 VCPU_STAT("inject_external_call", inject_external_call),
102 VM_STAT("inject_float_mchk", inject_float_mchk),
103 VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
104 VM_STAT("inject_io", inject_io),
105 VCPU_STAT("inject_mchk", inject_mchk),
106 VM_STAT("inject_pfault_done", inject_pfault_done),
107 VCPU_STAT("inject_program", inject_program),
108 VCPU_STAT("inject_restart", inject_restart),
109 VM_STAT("inject_service_signal", inject_service_signal),
110 VCPU_STAT("inject_set_prefix", inject_set_prefix),
111 VCPU_STAT("inject_stop_signal", inject_stop_signal),
112 VCPU_STAT("inject_pfault_init", inject_pfault_init),
113 VM_STAT("inject_virtio", inject_virtio),
114 VCPU_STAT("instruction_epsw", instruction_epsw),
115 VCPU_STAT("instruction_gs", instruction_gs),
116 VCPU_STAT("instruction_io_other", instruction_io_other),
117 VCPU_STAT("instruction_lpsw", instruction_lpsw),
118 VCPU_STAT("instruction_lpswe", instruction_lpswe),
119 VCPU_STAT("instruction_pfmf", instruction_pfmf),
120 VCPU_STAT("instruction_ptff", instruction_ptff),
121 VCPU_STAT("instruction_stidp", instruction_stidp),
122 VCPU_STAT("instruction_sck", instruction_sck),
123 VCPU_STAT("instruction_sckpf", instruction_sckpf),
124 VCPU_STAT("instruction_spx", instruction_spx),
125 VCPU_STAT("instruction_stpx", instruction_stpx),
126 VCPU_STAT("instruction_stap", instruction_stap),
127 VCPU_STAT("instruction_iske", instruction_iske),
128 VCPU_STAT("instruction_ri", instruction_ri),
129 VCPU_STAT("instruction_rrbe", instruction_rrbe),
130 VCPU_STAT("instruction_sske", instruction_sske),
131 VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
132 VCPU_STAT("instruction_essa", instruction_essa),
133 VCPU_STAT("instruction_stsi", instruction_stsi),
134 VCPU_STAT("instruction_stfl", instruction_stfl),
135 VCPU_STAT("instruction_tb", instruction_tb),
136 VCPU_STAT("instruction_tpi", instruction_tpi),
137 VCPU_STAT("instruction_tprot", instruction_tprot),
138 VCPU_STAT("instruction_tsch", instruction_tsch),
139 VCPU_STAT("instruction_sthyi", instruction_sthyi),
140 VCPU_STAT("instruction_sie", instruction_sie),
141 VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
142 VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
143 VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
144 VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
145 VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
146 VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
147 VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
148 VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
149 VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
150 VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
151 VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
152 VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
153 VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
154 VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
155 VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
156 VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
157 VCPU_STAT("instruction_diag_10", diagnose_10),
158 VCPU_STAT("instruction_diag_44", diagnose_44),
159 VCPU_STAT("instruction_diag_9c", diagnose_9c),
160 VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
161 VCPU_STAT("instruction_diag_258", diagnose_258),
162 VCPU_STAT("instruction_diag_308", diagnose_308),
163 VCPU_STAT("instruction_diag_500", diagnose_500),
164 VCPU_STAT("instruction_diag_other", diagnose_other),
168 struct kvm_s390_tod_clock_ext {
174 /* allow nested virtualization in KVM (if enabled by user space) */
176 module_param(nested, int, S_IRUGO);
177 MODULE_PARM_DESC(nested, "Nested virtualization support");
179 /* allow 1m huge page guest backing, if !nested */
181 module_param(hpage, int, 0444);
182 MODULE_PARM_DESC(hpage, "1m huge page backing support");
184 /* maximum percentage of steal time for polling. >100 is treated like 100 */
185 static u8 halt_poll_max_steal = 10;
186 module_param(halt_poll_max_steal, byte, 0644);
187 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
189 /* if set to true, the GISA will be initialized and used if available */
190 static bool use_gisa = true;
191 module_param(use_gisa, bool, 0644);
192 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
195 * For now we handle at most 16 double words as this is what the s390 base
196 * kernel handles and stores in the prefix page. If we ever need to go beyond
197 * this, this requires changes to code, but the external uapi can stay.
199 #define SIZE_INTERNAL 16
202 * Base feature mask that defines default mask for facilities. Consists of the
203 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
205 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
207 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
208 * and defines the facilities that can be enabled via a cpu model.
210 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
212 static unsigned long kvm_s390_fac_size(void)
214 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
215 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
216 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
217 sizeof(S390_lowcore.stfle_fac_list));
219 return SIZE_INTERNAL;
222 /* available cpu features supported by kvm */
223 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
224 /* available subfunctions indicated via query / "test bit" */
225 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
227 static struct gmap_notifier gmap_notifier;
228 static struct gmap_notifier vsie_gmap_notifier;
229 debug_info_t *kvm_s390_dbf;
230 debug_info_t *kvm_s390_dbf_uv;
232 /* Section: not file related */
233 int kvm_arch_hardware_enable(void)
235 /* every s390 is virtualization enabled ;-) */
239 int kvm_arch_check_processor_compat(void *opaque)
244 /* forward declarations */
245 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
247 static int sca_switch_to_extended(struct kvm *kvm);
249 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
254 * The TOD jumps by delta, we have to compensate this by adding
255 * -delta to the epoch.
259 /* sign-extension - we're adding to signed values below */
264 if (scb->ecd & ECD_MEF) {
265 scb->epdx += delta_idx;
266 if (scb->epoch < delta)
272 * This callback is executed during stop_machine(). All CPUs are therefore
273 * temporarily stopped. In order not to change guest behavior, we have to
274 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
275 * so a CPU won't be stopped while calculating with the epoch.
277 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
281 struct kvm_vcpu *vcpu;
283 unsigned long long *delta = v;
285 list_for_each_entry(kvm, &vm_list, vm_list) {
286 kvm_for_each_vcpu(i, vcpu, kvm) {
287 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
289 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
290 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
292 if (vcpu->arch.cputm_enabled)
293 vcpu->arch.cputm_start += *delta;
294 if (vcpu->arch.vsie_block)
295 kvm_clock_sync_scb(vcpu->arch.vsie_block,
302 static struct notifier_block kvm_clock_notifier = {
303 .notifier_call = kvm_clock_sync,
306 int kvm_arch_hardware_setup(void *opaque)
308 gmap_notifier.notifier_call = kvm_gmap_notifier;
309 gmap_register_pte_notifier(&gmap_notifier);
310 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
311 gmap_register_pte_notifier(&vsie_gmap_notifier);
312 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
313 &kvm_clock_notifier);
317 void kvm_arch_hardware_unsetup(void)
319 gmap_unregister_pte_notifier(&gmap_notifier);
320 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
321 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
322 &kvm_clock_notifier);
325 static void allow_cpu_feat(unsigned long nr)
327 set_bit_inv(nr, kvm_s390_available_cpu_feat);
330 static inline int plo_test_bit(unsigned char nr)
332 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
336 /* Parameter registers are ignored for "test bit" */
346 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
348 register unsigned long r0 asm("0") = 0; /* query function */
349 register unsigned long r1 asm("1") = (unsigned long) query;
352 /* Parameter regs are ignored */
353 " .insn rrf,%[opc] << 16,2,4,6,0\n"
355 : "d" (r0), "a" (r1), [opc] "i" (opcode)
359 #define INSN_SORTL 0xb938
360 #define INSN_DFLTCC 0xb939
362 static void kvm_s390_cpu_feat_init(void)
366 for (i = 0; i < 256; ++i) {
368 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
371 if (test_facility(28)) /* TOD-clock steering */
372 ptff(kvm_s390_available_subfunc.ptff,
373 sizeof(kvm_s390_available_subfunc.ptff),
376 if (test_facility(17)) { /* MSA */
377 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
378 kvm_s390_available_subfunc.kmac);
379 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
380 kvm_s390_available_subfunc.kmc);
381 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
382 kvm_s390_available_subfunc.km);
383 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
384 kvm_s390_available_subfunc.kimd);
385 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
386 kvm_s390_available_subfunc.klmd);
388 if (test_facility(76)) /* MSA3 */
389 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
390 kvm_s390_available_subfunc.pckmo);
391 if (test_facility(77)) { /* MSA4 */
392 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
393 kvm_s390_available_subfunc.kmctr);
394 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
395 kvm_s390_available_subfunc.kmf);
396 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
397 kvm_s390_available_subfunc.kmo);
398 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
399 kvm_s390_available_subfunc.pcc);
401 if (test_facility(57)) /* MSA5 */
402 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
403 kvm_s390_available_subfunc.ppno);
405 if (test_facility(146)) /* MSA8 */
406 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
407 kvm_s390_available_subfunc.kma);
409 if (test_facility(155)) /* MSA9 */
410 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
411 kvm_s390_available_subfunc.kdsa);
413 if (test_facility(150)) /* SORTL */
414 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
416 if (test_facility(151)) /* DFLTCC */
417 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
419 if (MACHINE_HAS_ESOP)
420 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
422 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
423 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
425 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
426 !test_facility(3) || !nested)
428 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
429 if (sclp.has_64bscao)
430 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
432 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
434 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
436 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
438 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
440 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
442 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
444 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
446 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
447 * all skey handling functions read/set the skey from the PGSTE
448 * instead of the real storage key.
450 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
451 * pages being detected as preserved although they are resident.
453 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
454 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
456 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
457 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
458 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
460 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
461 * cannot easily shadow the SCA because of the ipte lock.
465 int kvm_arch_init(void *opaque)
469 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
473 kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
474 if (!kvm_s390_dbf_uv)
477 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
478 debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
481 kvm_s390_cpu_feat_init();
483 /* Register floating interrupt controller interface. */
484 rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
486 pr_err("A FLIC registration call failed with rc=%d\n", rc);
490 rc = kvm_s390_gib_init(GAL_ISC);
501 void kvm_arch_exit(void)
503 kvm_s390_gib_destroy();
504 debug_unregister(kvm_s390_dbf);
505 debug_unregister(kvm_s390_dbf_uv);
508 /* Section: device related */
509 long kvm_arch_dev_ioctl(struct file *filp,
510 unsigned int ioctl, unsigned long arg)
512 if (ioctl == KVM_S390_ENABLE_SIE)
513 return s390_enable_sie();
517 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
522 case KVM_CAP_S390_PSW:
523 case KVM_CAP_S390_GMAP:
524 case KVM_CAP_SYNC_MMU:
525 #ifdef CONFIG_KVM_S390_UCONTROL
526 case KVM_CAP_S390_UCONTROL:
528 case KVM_CAP_ASYNC_PF:
529 case KVM_CAP_SYNC_REGS:
530 case KVM_CAP_ONE_REG:
531 case KVM_CAP_ENABLE_CAP:
532 case KVM_CAP_S390_CSS_SUPPORT:
533 case KVM_CAP_IOEVENTFD:
534 case KVM_CAP_DEVICE_CTRL:
535 case KVM_CAP_S390_IRQCHIP:
536 case KVM_CAP_VM_ATTRIBUTES:
537 case KVM_CAP_MP_STATE:
538 case KVM_CAP_IMMEDIATE_EXIT:
539 case KVM_CAP_S390_INJECT_IRQ:
540 case KVM_CAP_S390_USER_SIGP:
541 case KVM_CAP_S390_USER_STSI:
542 case KVM_CAP_S390_SKEYS:
543 case KVM_CAP_S390_IRQ_STATE:
544 case KVM_CAP_S390_USER_INSTR0:
545 case KVM_CAP_S390_CMMA_MIGRATION:
546 case KVM_CAP_S390_AIS:
547 case KVM_CAP_S390_AIS_MIGRATION:
548 case KVM_CAP_S390_VCPU_RESETS:
549 case KVM_CAP_SET_GUEST_DEBUG:
550 case KVM_CAP_S390_DIAG318:
553 case KVM_CAP_S390_HPAGE_1M:
555 if (hpage && !kvm_is_ucontrol(kvm))
558 case KVM_CAP_S390_MEM_OP:
561 case KVM_CAP_NR_VCPUS:
562 case KVM_CAP_MAX_VCPUS:
563 case KVM_CAP_MAX_VCPU_ID:
564 r = KVM_S390_BSCA_CPU_SLOTS;
565 if (!kvm_s390_use_sca_entries())
567 else if (sclp.has_esca && sclp.has_64bscao)
568 r = KVM_S390_ESCA_CPU_SLOTS;
570 case KVM_CAP_S390_COW:
571 r = MACHINE_HAS_ESOP;
573 case KVM_CAP_S390_VECTOR_REGISTERS:
576 case KVM_CAP_S390_RI:
577 r = test_facility(64);
579 case KVM_CAP_S390_GS:
580 r = test_facility(133);
582 case KVM_CAP_S390_BPB:
583 r = test_facility(82);
585 case KVM_CAP_S390_PROTECTED:
586 r = is_prot_virt_host();
594 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
597 gfn_t cur_gfn, last_gfn;
598 unsigned long gaddr, vmaddr;
599 struct gmap *gmap = kvm->arch.gmap;
600 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
602 /* Loop over all guest segments */
603 cur_gfn = memslot->base_gfn;
604 last_gfn = memslot->base_gfn + memslot->npages;
605 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
606 gaddr = gfn_to_gpa(cur_gfn);
607 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
608 if (kvm_is_error_hva(vmaddr))
611 bitmap_zero(bitmap, _PAGE_ENTRIES);
612 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
613 for (i = 0; i < _PAGE_ENTRIES; i++) {
614 if (test_bit(i, bitmap))
615 mark_page_dirty(kvm, cur_gfn + i);
618 if (fatal_signal_pending(current))
624 /* Section: vm related */
625 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
628 * Get (and clear) the dirty memory log for a memory slot.
630 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
631 struct kvm_dirty_log *log)
635 struct kvm_memory_slot *memslot;
638 if (kvm_is_ucontrol(kvm))
641 mutex_lock(&kvm->slots_lock);
644 if (log->slot >= KVM_USER_MEM_SLOTS)
647 r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
651 /* Clear the dirty log */
653 n = kvm_dirty_bitmap_bytes(memslot);
654 memset(memslot->dirty_bitmap, 0, n);
658 mutex_unlock(&kvm->slots_lock);
662 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
665 struct kvm_vcpu *vcpu;
667 kvm_for_each_vcpu(i, vcpu, kvm) {
668 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
672 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
680 case KVM_CAP_S390_IRQCHIP:
681 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
682 kvm->arch.use_irqchip = 1;
685 case KVM_CAP_S390_USER_SIGP:
686 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
687 kvm->arch.user_sigp = 1;
690 case KVM_CAP_S390_VECTOR_REGISTERS:
691 mutex_lock(&kvm->lock);
692 if (kvm->created_vcpus) {
694 } else if (MACHINE_HAS_VX) {
695 set_kvm_facility(kvm->arch.model.fac_mask, 129);
696 set_kvm_facility(kvm->arch.model.fac_list, 129);
697 if (test_facility(134)) {
698 set_kvm_facility(kvm->arch.model.fac_mask, 134);
699 set_kvm_facility(kvm->arch.model.fac_list, 134);
701 if (test_facility(135)) {
702 set_kvm_facility(kvm->arch.model.fac_mask, 135);
703 set_kvm_facility(kvm->arch.model.fac_list, 135);
705 if (test_facility(148)) {
706 set_kvm_facility(kvm->arch.model.fac_mask, 148);
707 set_kvm_facility(kvm->arch.model.fac_list, 148);
709 if (test_facility(152)) {
710 set_kvm_facility(kvm->arch.model.fac_mask, 152);
711 set_kvm_facility(kvm->arch.model.fac_list, 152);
716 mutex_unlock(&kvm->lock);
717 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
718 r ? "(not available)" : "(success)");
720 case KVM_CAP_S390_RI:
722 mutex_lock(&kvm->lock);
723 if (kvm->created_vcpus) {
725 } else if (test_facility(64)) {
726 set_kvm_facility(kvm->arch.model.fac_mask, 64);
727 set_kvm_facility(kvm->arch.model.fac_list, 64);
730 mutex_unlock(&kvm->lock);
731 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
732 r ? "(not available)" : "(success)");
734 case KVM_CAP_S390_AIS:
735 mutex_lock(&kvm->lock);
736 if (kvm->created_vcpus) {
739 set_kvm_facility(kvm->arch.model.fac_mask, 72);
740 set_kvm_facility(kvm->arch.model.fac_list, 72);
743 mutex_unlock(&kvm->lock);
744 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
745 r ? "(not available)" : "(success)");
747 case KVM_CAP_S390_GS:
749 mutex_lock(&kvm->lock);
750 if (kvm->created_vcpus) {
752 } else if (test_facility(133)) {
753 set_kvm_facility(kvm->arch.model.fac_mask, 133);
754 set_kvm_facility(kvm->arch.model.fac_list, 133);
757 mutex_unlock(&kvm->lock);
758 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
759 r ? "(not available)" : "(success)");
761 case KVM_CAP_S390_HPAGE_1M:
762 mutex_lock(&kvm->lock);
763 if (kvm->created_vcpus)
765 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
769 mmap_write_lock(kvm->mm);
770 kvm->mm->context.allow_gmap_hpage_1m = 1;
771 mmap_write_unlock(kvm->mm);
773 * We might have to create fake 4k page
774 * tables. To avoid that the hardware works on
775 * stale PGSTEs, we emulate these instructions.
777 kvm->arch.use_skf = 0;
778 kvm->arch.use_pfmfi = 0;
780 mutex_unlock(&kvm->lock);
781 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
782 r ? "(not available)" : "(success)");
784 case KVM_CAP_S390_USER_STSI:
785 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
786 kvm->arch.user_stsi = 1;
789 case KVM_CAP_S390_USER_INSTR0:
790 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
791 kvm->arch.user_instr0 = 1;
792 icpt_operexc_on_all_vcpus(kvm);
802 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
806 switch (attr->attr) {
807 case KVM_S390_VM_MEM_LIMIT_SIZE:
809 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
810 kvm->arch.mem_limit);
811 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
821 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
825 switch (attr->attr) {
826 case KVM_S390_VM_MEM_ENABLE_CMMA:
831 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
832 mutex_lock(&kvm->lock);
833 if (kvm->created_vcpus)
835 else if (kvm->mm->context.allow_gmap_hpage_1m)
838 kvm->arch.use_cmma = 1;
839 /* Not compatible with cmma. */
840 kvm->arch.use_pfmfi = 0;
843 mutex_unlock(&kvm->lock);
845 case KVM_S390_VM_MEM_CLR_CMMA:
850 if (!kvm->arch.use_cmma)
853 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
854 mutex_lock(&kvm->lock);
855 idx = srcu_read_lock(&kvm->srcu);
856 s390_reset_cmma(kvm->arch.gmap->mm);
857 srcu_read_unlock(&kvm->srcu, idx);
858 mutex_unlock(&kvm->lock);
861 case KVM_S390_VM_MEM_LIMIT_SIZE: {
862 unsigned long new_limit;
864 if (kvm_is_ucontrol(kvm))
867 if (get_user(new_limit, (u64 __user *)attr->addr))
870 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
871 new_limit > kvm->arch.mem_limit)
877 /* gmap_create takes last usable address */
878 if (new_limit != KVM_S390_NO_MEM_LIMIT)
882 mutex_lock(&kvm->lock);
883 if (!kvm->created_vcpus) {
884 /* gmap_create will round the limit up */
885 struct gmap *new = gmap_create(current->mm, new_limit);
890 gmap_remove(kvm->arch.gmap);
892 kvm->arch.gmap = new;
896 mutex_unlock(&kvm->lock);
897 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
898 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
899 (void *) kvm->arch.gmap->asce);
909 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
911 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
913 struct kvm_vcpu *vcpu;
916 kvm_s390_vcpu_block_all(kvm);
918 kvm_for_each_vcpu(i, vcpu, kvm) {
919 kvm_s390_vcpu_crypto_setup(vcpu);
920 /* recreate the shadow crycb by leaving the VSIE handler */
921 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
924 kvm_s390_vcpu_unblock_all(kvm);
927 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
929 mutex_lock(&kvm->lock);
930 switch (attr->attr) {
931 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
932 if (!test_kvm_facility(kvm, 76)) {
933 mutex_unlock(&kvm->lock);
937 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
938 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
939 kvm->arch.crypto.aes_kw = 1;
940 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
942 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
943 if (!test_kvm_facility(kvm, 76)) {
944 mutex_unlock(&kvm->lock);
948 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
949 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
950 kvm->arch.crypto.dea_kw = 1;
951 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
953 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
954 if (!test_kvm_facility(kvm, 76)) {
955 mutex_unlock(&kvm->lock);
958 kvm->arch.crypto.aes_kw = 0;
959 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
960 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
961 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
963 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
964 if (!test_kvm_facility(kvm, 76)) {
965 mutex_unlock(&kvm->lock);
968 kvm->arch.crypto.dea_kw = 0;
969 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
970 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
971 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
973 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
974 if (!ap_instructions_available()) {
975 mutex_unlock(&kvm->lock);
978 kvm->arch.crypto.apie = 1;
980 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
981 if (!ap_instructions_available()) {
982 mutex_unlock(&kvm->lock);
985 kvm->arch.crypto.apie = 0;
988 mutex_unlock(&kvm->lock);
992 kvm_s390_vcpu_crypto_reset_all(kvm);
993 mutex_unlock(&kvm->lock);
997 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1000 struct kvm_vcpu *vcpu;
1002 kvm_for_each_vcpu(cx, vcpu, kvm)
1003 kvm_s390_sync_request(req, vcpu);
1007 * Must be called with kvm->srcu held to avoid races on memslots, and with
1008 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1010 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1012 struct kvm_memory_slot *ms;
1013 struct kvm_memslots *slots;
1014 unsigned long ram_pages = 0;
1017 /* migration mode already enabled */
1018 if (kvm->arch.migration_mode)
1020 slots = kvm_memslots(kvm);
1021 if (!slots || !slots->used_slots)
1024 if (!kvm->arch.use_cmma) {
1025 kvm->arch.migration_mode = 1;
1028 /* mark all the pages in active slots as dirty */
1029 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1030 ms = slots->memslots + slotnr;
1031 if (!ms->dirty_bitmap)
1034 * The second half of the bitmap is only used on x86,
1035 * and would be wasted otherwise, so we put it to good
1036 * use here to keep track of the state of the storage
1039 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1040 ram_pages += ms->npages;
1042 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1043 kvm->arch.migration_mode = 1;
1044 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1049 * Must be called with kvm->slots_lock to avoid races with ourselves and
1050 * kvm_s390_vm_start_migration.
1052 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1054 /* migration mode already disabled */
1055 if (!kvm->arch.migration_mode)
1057 kvm->arch.migration_mode = 0;
1058 if (kvm->arch.use_cmma)
1059 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1063 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1064 struct kvm_device_attr *attr)
1068 mutex_lock(&kvm->slots_lock);
1069 switch (attr->attr) {
1070 case KVM_S390_VM_MIGRATION_START:
1071 res = kvm_s390_vm_start_migration(kvm);
1073 case KVM_S390_VM_MIGRATION_STOP:
1074 res = kvm_s390_vm_stop_migration(kvm);
1079 mutex_unlock(&kvm->slots_lock);
1084 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1085 struct kvm_device_attr *attr)
1087 u64 mig = kvm->arch.migration_mode;
1089 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1092 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1097 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1099 struct kvm_s390_vm_tod_clock gtod;
1101 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1104 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1106 kvm_s390_set_tod_clock(kvm, >od);
1108 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1109 gtod.epoch_idx, gtod.tod);
1114 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1118 if (copy_from_user(>od_high, (void __user *)attr->addr,
1124 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1129 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1131 struct kvm_s390_vm_tod_clock gtod = { 0 };
1133 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1137 kvm_s390_set_tod_clock(kvm, >od);
1138 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1142 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1149 switch (attr->attr) {
1150 case KVM_S390_VM_TOD_EXT:
1151 ret = kvm_s390_set_tod_ext(kvm, attr);
1153 case KVM_S390_VM_TOD_HIGH:
1154 ret = kvm_s390_set_tod_high(kvm, attr);
1156 case KVM_S390_VM_TOD_LOW:
1157 ret = kvm_s390_set_tod_low(kvm, attr);
1166 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1167 struct kvm_s390_vm_tod_clock *gtod)
1169 struct kvm_s390_tod_clock_ext htod;
1173 get_tod_clock_ext((char *)&htod);
1175 gtod->tod = htod.tod + kvm->arch.epoch;
1176 gtod->epoch_idx = 0;
1177 if (test_kvm_facility(kvm, 139)) {
1178 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1179 if (gtod->tod < htod.tod)
1180 gtod->epoch_idx += 1;
1186 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1188 struct kvm_s390_vm_tod_clock gtod;
1190 memset(>od, 0, sizeof(gtod));
1191 kvm_s390_get_tod_clock(kvm, >od);
1192 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1195 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1196 gtod.epoch_idx, gtod.tod);
1200 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1204 if (copy_to_user((void __user *)attr->addr, >od_high,
1207 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1212 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1216 gtod = kvm_s390_get_tod_clock_fast(kvm);
1217 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1219 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1224 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1231 switch (attr->attr) {
1232 case KVM_S390_VM_TOD_EXT:
1233 ret = kvm_s390_get_tod_ext(kvm, attr);
1235 case KVM_S390_VM_TOD_HIGH:
1236 ret = kvm_s390_get_tod_high(kvm, attr);
1238 case KVM_S390_VM_TOD_LOW:
1239 ret = kvm_s390_get_tod_low(kvm, attr);
1248 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1250 struct kvm_s390_vm_cpu_processor *proc;
1251 u16 lowest_ibc, unblocked_ibc;
1254 mutex_lock(&kvm->lock);
1255 if (kvm->created_vcpus) {
1259 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1264 if (!copy_from_user(proc, (void __user *)attr->addr,
1266 kvm->arch.model.cpuid = proc->cpuid;
1267 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1268 unblocked_ibc = sclp.ibc & 0xfff;
1269 if (lowest_ibc && proc->ibc) {
1270 if (proc->ibc > unblocked_ibc)
1271 kvm->arch.model.ibc = unblocked_ibc;
1272 else if (proc->ibc < lowest_ibc)
1273 kvm->arch.model.ibc = lowest_ibc;
1275 kvm->arch.model.ibc = proc->ibc;
1277 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1278 S390_ARCH_FAC_LIST_SIZE_BYTE);
1279 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1280 kvm->arch.model.ibc,
1281 kvm->arch.model.cpuid);
1282 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1283 kvm->arch.model.fac_list[0],
1284 kvm->arch.model.fac_list[1],
1285 kvm->arch.model.fac_list[2]);
1290 mutex_unlock(&kvm->lock);
1294 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1295 struct kvm_device_attr *attr)
1297 struct kvm_s390_vm_cpu_feat data;
1299 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1301 if (!bitmap_subset((unsigned long *) data.feat,
1302 kvm_s390_available_cpu_feat,
1303 KVM_S390_VM_CPU_FEAT_NR_BITS))
1306 mutex_lock(&kvm->lock);
1307 if (kvm->created_vcpus) {
1308 mutex_unlock(&kvm->lock);
1311 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1312 KVM_S390_VM_CPU_FEAT_NR_BITS);
1313 mutex_unlock(&kvm->lock);
1314 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1321 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1322 struct kvm_device_attr *attr)
1324 mutex_lock(&kvm->lock);
1325 if (kvm->created_vcpus) {
1326 mutex_unlock(&kvm->lock);
1330 if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1331 sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1332 mutex_unlock(&kvm->lock);
1335 mutex_unlock(&kvm->lock);
1337 VM_EVENT(kvm, 3, "SET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1338 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1339 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1340 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1341 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1342 VM_EVENT(kvm, 3, "SET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1343 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1344 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1345 VM_EVENT(kvm, 3, "SET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1346 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1347 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1348 VM_EVENT(kvm, 3, "SET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1349 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1350 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1351 VM_EVENT(kvm, 3, "SET: guest KM subfunc 0x%16.16lx.%16.16lx",
1352 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1353 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1354 VM_EVENT(kvm, 3, "SET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1355 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1356 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1357 VM_EVENT(kvm, 3, "SET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1358 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1359 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1360 VM_EVENT(kvm, 3, "SET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1361 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1362 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1363 VM_EVENT(kvm, 3, "SET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1364 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1365 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1366 VM_EVENT(kvm, 3, "SET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1367 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1368 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1369 VM_EVENT(kvm, 3, "SET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1370 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1371 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1372 VM_EVENT(kvm, 3, "SET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1373 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1374 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1375 VM_EVENT(kvm, 3, "SET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1376 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1377 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1378 VM_EVENT(kvm, 3, "SET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1379 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1380 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1381 VM_EVENT(kvm, 3, "SET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1382 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1383 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1384 VM_EVENT(kvm, 3, "SET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1385 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1386 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1387 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1388 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1389 VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1390 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1391 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1392 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1393 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1398 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1402 switch (attr->attr) {
1403 case KVM_S390_VM_CPU_PROCESSOR:
1404 ret = kvm_s390_set_processor(kvm, attr);
1406 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1407 ret = kvm_s390_set_processor_feat(kvm, attr);
1409 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1410 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1416 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1418 struct kvm_s390_vm_cpu_processor *proc;
1421 proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1426 proc->cpuid = kvm->arch.model.cpuid;
1427 proc->ibc = kvm->arch.model.ibc;
1428 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1429 S390_ARCH_FAC_LIST_SIZE_BYTE);
1430 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1431 kvm->arch.model.ibc,
1432 kvm->arch.model.cpuid);
1433 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1434 kvm->arch.model.fac_list[0],
1435 kvm->arch.model.fac_list[1],
1436 kvm->arch.model.fac_list[2]);
1437 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1444 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1446 struct kvm_s390_vm_cpu_machine *mach;
1449 mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1454 get_cpu_id((struct cpuid *) &mach->cpuid);
1455 mach->ibc = sclp.ibc;
1456 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1457 S390_ARCH_FAC_LIST_SIZE_BYTE);
1458 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1459 sizeof(S390_lowcore.stfle_fac_list));
1460 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1461 kvm->arch.model.ibc,
1462 kvm->arch.model.cpuid);
1463 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1467 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1471 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1478 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1479 struct kvm_device_attr *attr)
1481 struct kvm_s390_vm_cpu_feat data;
1483 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1484 KVM_S390_VM_CPU_FEAT_NR_BITS);
1485 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1487 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1494 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1495 struct kvm_device_attr *attr)
1497 struct kvm_s390_vm_cpu_feat data;
1499 bitmap_copy((unsigned long *) data.feat,
1500 kvm_s390_available_cpu_feat,
1501 KVM_S390_VM_CPU_FEAT_NR_BITS);
1502 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1504 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1511 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1512 struct kvm_device_attr *attr)
1514 if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1515 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1518 VM_EVENT(kvm, 3, "GET: guest PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1519 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1520 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1521 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1522 ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1523 VM_EVENT(kvm, 3, "GET: guest PTFF subfunc 0x%16.16lx.%16.16lx",
1524 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1525 ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1526 VM_EVENT(kvm, 3, "GET: guest KMAC subfunc 0x%16.16lx.%16.16lx",
1527 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1528 ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1529 VM_EVENT(kvm, 3, "GET: guest KMC subfunc 0x%16.16lx.%16.16lx",
1530 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1531 ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1532 VM_EVENT(kvm, 3, "GET: guest KM subfunc 0x%16.16lx.%16.16lx",
1533 ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1534 ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1535 VM_EVENT(kvm, 3, "GET: guest KIMD subfunc 0x%16.16lx.%16.16lx",
1536 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1537 ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1538 VM_EVENT(kvm, 3, "GET: guest KLMD subfunc 0x%16.16lx.%16.16lx",
1539 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1540 ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1541 VM_EVENT(kvm, 3, "GET: guest PCKMO subfunc 0x%16.16lx.%16.16lx",
1542 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1543 ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1544 VM_EVENT(kvm, 3, "GET: guest KMCTR subfunc 0x%16.16lx.%16.16lx",
1545 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1546 ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1547 VM_EVENT(kvm, 3, "GET: guest KMF subfunc 0x%16.16lx.%16.16lx",
1548 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1549 ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1550 VM_EVENT(kvm, 3, "GET: guest KMO subfunc 0x%16.16lx.%16.16lx",
1551 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1552 ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1553 VM_EVENT(kvm, 3, "GET: guest PCC subfunc 0x%16.16lx.%16.16lx",
1554 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1555 ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1556 VM_EVENT(kvm, 3, "GET: guest PPNO subfunc 0x%16.16lx.%16.16lx",
1557 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1558 ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1559 VM_EVENT(kvm, 3, "GET: guest KMA subfunc 0x%16.16lx.%16.16lx",
1560 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1561 ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1562 VM_EVENT(kvm, 3, "GET: guest KDSA subfunc 0x%16.16lx.%16.16lx",
1563 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1564 ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1565 VM_EVENT(kvm, 3, "GET: guest SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1566 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1567 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1568 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1569 ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1570 VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1571 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1572 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1573 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1574 ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1579 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1580 struct kvm_device_attr *attr)
1582 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1583 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1586 VM_EVENT(kvm, 3, "GET: host PLO subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1587 ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1588 ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1589 ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1590 ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1591 VM_EVENT(kvm, 3, "GET: host PTFF subfunc 0x%16.16lx.%16.16lx",
1592 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1593 ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1594 VM_EVENT(kvm, 3, "GET: host KMAC subfunc 0x%16.16lx.%16.16lx",
1595 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1596 ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1597 VM_EVENT(kvm, 3, "GET: host KMC subfunc 0x%16.16lx.%16.16lx",
1598 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1599 ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1600 VM_EVENT(kvm, 3, "GET: host KM subfunc 0x%16.16lx.%16.16lx",
1601 ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1602 ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1603 VM_EVENT(kvm, 3, "GET: host KIMD subfunc 0x%16.16lx.%16.16lx",
1604 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1605 ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1606 VM_EVENT(kvm, 3, "GET: host KLMD subfunc 0x%16.16lx.%16.16lx",
1607 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1608 ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1609 VM_EVENT(kvm, 3, "GET: host PCKMO subfunc 0x%16.16lx.%16.16lx",
1610 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1611 ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1612 VM_EVENT(kvm, 3, "GET: host KMCTR subfunc 0x%16.16lx.%16.16lx",
1613 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1614 ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1615 VM_EVENT(kvm, 3, "GET: host KMF subfunc 0x%16.16lx.%16.16lx",
1616 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1617 ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1618 VM_EVENT(kvm, 3, "GET: host KMO subfunc 0x%16.16lx.%16.16lx",
1619 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1620 ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1621 VM_EVENT(kvm, 3, "GET: host PCC subfunc 0x%16.16lx.%16.16lx",
1622 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1623 ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1624 VM_EVENT(kvm, 3, "GET: host PPNO subfunc 0x%16.16lx.%16.16lx",
1625 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1626 ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1627 VM_EVENT(kvm, 3, "GET: host KMA subfunc 0x%16.16lx.%16.16lx",
1628 ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1629 ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1630 VM_EVENT(kvm, 3, "GET: host KDSA subfunc 0x%16.16lx.%16.16lx",
1631 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1632 ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1633 VM_EVENT(kvm, 3, "GET: host SORTL subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1634 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1635 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1636 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1637 ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1638 VM_EVENT(kvm, 3, "GET: host DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1639 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1640 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1641 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1642 ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1647 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1651 switch (attr->attr) {
1652 case KVM_S390_VM_CPU_PROCESSOR:
1653 ret = kvm_s390_get_processor(kvm, attr);
1655 case KVM_S390_VM_CPU_MACHINE:
1656 ret = kvm_s390_get_machine(kvm, attr);
1658 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1659 ret = kvm_s390_get_processor_feat(kvm, attr);
1661 case KVM_S390_VM_CPU_MACHINE_FEAT:
1662 ret = kvm_s390_get_machine_feat(kvm, attr);
1664 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1665 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1667 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1668 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1674 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1678 switch (attr->group) {
1679 case KVM_S390_VM_MEM_CTRL:
1680 ret = kvm_s390_set_mem_control(kvm, attr);
1682 case KVM_S390_VM_TOD:
1683 ret = kvm_s390_set_tod(kvm, attr);
1685 case KVM_S390_VM_CPU_MODEL:
1686 ret = kvm_s390_set_cpu_model(kvm, attr);
1688 case KVM_S390_VM_CRYPTO:
1689 ret = kvm_s390_vm_set_crypto(kvm, attr);
1691 case KVM_S390_VM_MIGRATION:
1692 ret = kvm_s390_vm_set_migration(kvm, attr);
1702 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1706 switch (attr->group) {
1707 case KVM_S390_VM_MEM_CTRL:
1708 ret = kvm_s390_get_mem_control(kvm, attr);
1710 case KVM_S390_VM_TOD:
1711 ret = kvm_s390_get_tod(kvm, attr);
1713 case KVM_S390_VM_CPU_MODEL:
1714 ret = kvm_s390_get_cpu_model(kvm, attr);
1716 case KVM_S390_VM_MIGRATION:
1717 ret = kvm_s390_vm_get_migration(kvm, attr);
1727 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1731 switch (attr->group) {
1732 case KVM_S390_VM_MEM_CTRL:
1733 switch (attr->attr) {
1734 case KVM_S390_VM_MEM_ENABLE_CMMA:
1735 case KVM_S390_VM_MEM_CLR_CMMA:
1736 ret = sclp.has_cmma ? 0 : -ENXIO;
1738 case KVM_S390_VM_MEM_LIMIT_SIZE:
1746 case KVM_S390_VM_TOD:
1747 switch (attr->attr) {
1748 case KVM_S390_VM_TOD_LOW:
1749 case KVM_S390_VM_TOD_HIGH:
1757 case KVM_S390_VM_CPU_MODEL:
1758 switch (attr->attr) {
1759 case KVM_S390_VM_CPU_PROCESSOR:
1760 case KVM_S390_VM_CPU_MACHINE:
1761 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1762 case KVM_S390_VM_CPU_MACHINE_FEAT:
1763 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1764 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1772 case KVM_S390_VM_CRYPTO:
1773 switch (attr->attr) {
1774 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1775 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1776 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1777 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1780 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1781 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1782 ret = ap_instructions_available() ? 0 : -ENXIO;
1789 case KVM_S390_VM_MIGRATION:
1800 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1804 int srcu_idx, i, r = 0;
1806 if (args->flags != 0)
1809 /* Is this guest using storage keys? */
1810 if (!mm_uses_skeys(current->mm))
1811 return KVM_S390_GET_SKEYS_NONE;
1813 /* Enforce sane limit on memory allocation */
1814 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1817 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1821 mmap_read_lock(current->mm);
1822 srcu_idx = srcu_read_lock(&kvm->srcu);
1823 for (i = 0; i < args->count; i++) {
1824 hva = gfn_to_hva(kvm, args->start_gfn + i);
1825 if (kvm_is_error_hva(hva)) {
1830 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1834 srcu_read_unlock(&kvm->srcu, srcu_idx);
1835 mmap_read_unlock(current->mm);
1838 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1839 sizeof(uint8_t) * args->count);
1848 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1852 int srcu_idx, i, r = 0;
1855 if (args->flags != 0)
1858 /* Enforce sane limit on memory allocation */
1859 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1862 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1866 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1867 sizeof(uint8_t) * args->count);
1873 /* Enable storage key handling for the guest */
1874 r = s390_enable_skey();
1879 mmap_read_lock(current->mm);
1880 srcu_idx = srcu_read_lock(&kvm->srcu);
1881 while (i < args->count) {
1883 hva = gfn_to_hva(kvm, args->start_gfn + i);
1884 if (kvm_is_error_hva(hva)) {
1889 /* Lowest order bit is reserved */
1890 if (keys[i] & 0x01) {
1895 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1897 r = fixup_user_fault(current->mm, hva,
1898 FAULT_FLAG_WRITE, &unlocked);
1905 srcu_read_unlock(&kvm->srcu, srcu_idx);
1906 mmap_read_unlock(current->mm);
1913 * Base address and length must be sent at the start of each block, therefore
1914 * it's cheaper to send some clean data, as long as it's less than the size of
1917 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1918 /* for consistency */
1919 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1922 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1923 * address falls in a hole. In that case the index of one of the memslots
1924 * bordering the hole is returned.
1926 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1928 int start = 0, end = slots->used_slots;
1929 int slot = atomic_read(&slots->lru_slot);
1930 struct kvm_memory_slot *memslots = slots->memslots;
1932 if (gfn >= memslots[slot].base_gfn &&
1933 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1936 while (start < end) {
1937 slot = start + (end - start) / 2;
1939 if (gfn >= memslots[slot].base_gfn)
1945 if (start >= slots->used_slots)
1946 return slots->used_slots - 1;
1948 if (gfn >= memslots[start].base_gfn &&
1949 gfn < memslots[start].base_gfn + memslots[start].npages) {
1950 atomic_set(&slots->lru_slot, start);
1956 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1957 u8 *res, unsigned long bufsize)
1959 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1962 while (args->count < bufsize) {
1963 hva = gfn_to_hva(kvm, cur_gfn);
1965 * We return an error if the first value was invalid, but we
1966 * return successfully if at least one value was copied.
1968 if (kvm_is_error_hva(hva))
1969 return args->count ? 0 : -EFAULT;
1970 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1972 res[args->count++] = (pgstev >> 24) & 0x43;
1979 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1980 unsigned long cur_gfn)
1982 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1983 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1984 unsigned long ofs = cur_gfn - ms->base_gfn;
1986 if (ms->base_gfn + ms->npages <= cur_gfn) {
1988 /* If we are above the highest slot, wrap around */
1990 slotidx = slots->used_slots - 1;
1992 ms = slots->memslots + slotidx;
1995 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1996 while ((slotidx > 0) && (ofs >= ms->npages)) {
1998 ms = slots->memslots + slotidx;
1999 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2001 return ms->base_gfn + ofs;
2004 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2005 u8 *res, unsigned long bufsize)
2007 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2008 struct kvm_memslots *slots = kvm_memslots(kvm);
2009 struct kvm_memory_slot *ms;
2011 if (unlikely(!slots->used_slots))
2014 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2015 ms = gfn_to_memslot(kvm, cur_gfn);
2017 args->start_gfn = cur_gfn;
2020 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2021 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2023 while (args->count < bufsize) {
2024 hva = gfn_to_hva(kvm, cur_gfn);
2025 if (kvm_is_error_hva(hva))
2027 /* Decrement only if we actually flipped the bit to 0 */
2028 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2029 atomic64_dec(&kvm->arch.cmma_dirty_pages);
2030 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2032 /* Save the value */
2033 res[args->count++] = (pgstev >> 24) & 0x43;
2034 /* If the next bit is too far away, stop. */
2035 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2037 /* If we reached the previous "next", find the next one */
2038 if (cur_gfn == next_gfn)
2039 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2040 /* Reached the end of memory or of the buffer, stop */
2041 if ((next_gfn >= mem_end) ||
2042 (next_gfn - args->start_gfn >= bufsize))
2045 /* Reached the end of the current memslot, take the next one. */
2046 if (cur_gfn - ms->base_gfn >= ms->npages) {
2047 ms = gfn_to_memslot(kvm, cur_gfn);
2056 * This function searches for the next page with dirty CMMA attributes, and
2057 * saves the attributes in the buffer up to either the end of the buffer or
2058 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2059 * no trailing clean bytes are saved.
2060 * In case no dirty bits were found, or if CMMA was not enabled or used, the
2061 * output buffer will indicate 0 as length.
2063 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2064 struct kvm_s390_cmma_log *args)
2066 unsigned long bufsize;
2067 int srcu_idx, peek, ret;
2070 if (!kvm->arch.use_cmma)
2072 /* Invalid/unsupported flags were specified */
2073 if (args->flags & ~KVM_S390_CMMA_PEEK)
2075 /* Migration mode query, and we are not doing a migration */
2076 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2077 if (!peek && !kvm->arch.migration_mode)
2079 /* CMMA is disabled or was not used, or the buffer has length zero */
2080 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2081 if (!bufsize || !kvm->mm->context.uses_cmm) {
2082 memset(args, 0, sizeof(*args));
2085 /* We are not peeking, and there are no dirty pages */
2086 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2087 memset(args, 0, sizeof(*args));
2091 values = vmalloc(bufsize);
2095 mmap_read_lock(kvm->mm);
2096 srcu_idx = srcu_read_lock(&kvm->srcu);
2098 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2100 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2101 srcu_read_unlock(&kvm->srcu, srcu_idx);
2102 mmap_read_unlock(kvm->mm);
2104 if (kvm->arch.migration_mode)
2105 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2107 args->remaining = 0;
2109 if (copy_to_user((void __user *)args->values, values, args->count))
2117 * This function sets the CMMA attributes for the given pages. If the input
2118 * buffer has zero length, no action is taken, otherwise the attributes are
2119 * set and the mm->context.uses_cmm flag is set.
2121 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2122 const struct kvm_s390_cmma_log *args)
2124 unsigned long hva, mask, pgstev, i;
2126 int srcu_idx, r = 0;
2130 if (!kvm->arch.use_cmma)
2132 /* invalid/unsupported flags */
2133 if (args->flags != 0)
2135 /* Enforce sane limit on memory allocation */
2136 if (args->count > KVM_S390_CMMA_SIZE_MAX)
2139 if (args->count == 0)
2142 bits = vmalloc(array_size(sizeof(*bits), args->count));
2146 r = copy_from_user(bits, (void __user *)args->values, args->count);
2152 mmap_read_lock(kvm->mm);
2153 srcu_idx = srcu_read_lock(&kvm->srcu);
2154 for (i = 0; i < args->count; i++) {
2155 hva = gfn_to_hva(kvm, args->start_gfn + i);
2156 if (kvm_is_error_hva(hva)) {
2162 pgstev = pgstev << 24;
2163 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2164 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2166 srcu_read_unlock(&kvm->srcu, srcu_idx);
2167 mmap_read_unlock(kvm->mm);
2169 if (!kvm->mm->context.uses_cmm) {
2170 mmap_write_lock(kvm->mm);
2171 kvm->mm->context.uses_cmm = 1;
2172 mmap_write_unlock(kvm->mm);
2179 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2181 struct kvm_vcpu *vcpu;
2187 * We ignore failures and try to destroy as many CPUs as possible.
2188 * At the same time we must not free the assigned resources when
2189 * this fails, as the ultravisor has still access to that memory.
2190 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2192 * We want to return the first failure rc and rrc, though.
2194 kvm_for_each_vcpu(i, vcpu, kvm) {
2195 mutex_lock(&vcpu->mutex);
2196 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2201 mutex_unlock(&vcpu->mutex);
2206 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2211 struct kvm_vcpu *vcpu;
2213 kvm_for_each_vcpu(i, vcpu, kvm) {
2214 mutex_lock(&vcpu->mutex);
2215 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2216 mutex_unlock(&vcpu->mutex);
2221 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2225 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2229 void __user *argp = (void __user *)cmd->data;
2232 case KVM_PV_ENABLE: {
2234 if (kvm_s390_pv_is_protected(kvm))
2238 * FMT 4 SIE needs esca. As we never switch back to bsca from
2239 * esca, we need no cleanup in the error cases below
2241 r = sca_switch_to_extended(kvm);
2245 mmap_write_lock(current->mm);
2246 r = gmap_mark_unmergeable();
2247 mmap_write_unlock(current->mm);
2251 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2255 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2257 kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2259 /* we need to block service interrupts from now on */
2260 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2263 case KVM_PV_DISABLE: {
2265 if (!kvm_s390_pv_is_protected(kvm))
2268 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2270 * If a CPU could not be destroyed, destroy VM will also fail.
2271 * There is no point in trying to destroy it. Instead return
2272 * the rc and rrc from the first CPU that failed destroying.
2276 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2278 /* no need to block service interrupts any more */
2279 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2282 case KVM_PV_SET_SEC_PARMS: {
2283 struct kvm_s390_pv_sec_parm parms = {};
2287 if (!kvm_s390_pv_is_protected(kvm))
2291 if (copy_from_user(&parms, argp, sizeof(parms)))
2294 /* Currently restricted to 8KB */
2296 if (parms.length > PAGE_SIZE * 2)
2300 hdr = vmalloc(parms.length);
2305 if (!copy_from_user(hdr, (void __user *)parms.origin,
2307 r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2308 &cmd->rc, &cmd->rrc);
2313 case KVM_PV_UNPACK: {
2314 struct kvm_s390_pv_unp unp = {};
2317 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2321 if (copy_from_user(&unp, argp, sizeof(unp)))
2324 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2325 &cmd->rc, &cmd->rrc);
2328 case KVM_PV_VERIFY: {
2330 if (!kvm_s390_pv_is_protected(kvm))
2333 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2334 UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2335 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2339 case KVM_PV_PREP_RESET: {
2341 if (!kvm_s390_pv_is_protected(kvm))
2344 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2345 UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2346 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2350 case KVM_PV_UNSHARE_ALL: {
2352 if (!kvm_s390_pv_is_protected(kvm))
2355 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2356 UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2357 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2367 long kvm_arch_vm_ioctl(struct file *filp,
2368 unsigned int ioctl, unsigned long arg)
2370 struct kvm *kvm = filp->private_data;
2371 void __user *argp = (void __user *)arg;
2372 struct kvm_device_attr attr;
2376 case KVM_S390_INTERRUPT: {
2377 struct kvm_s390_interrupt s390int;
2380 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2382 r = kvm_s390_inject_vm(kvm, &s390int);
2385 case KVM_CREATE_IRQCHIP: {
2386 struct kvm_irq_routing_entry routing;
2389 if (kvm->arch.use_irqchip) {
2390 /* Set up dummy routing. */
2391 memset(&routing, 0, sizeof(routing));
2392 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2396 case KVM_SET_DEVICE_ATTR: {
2398 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2400 r = kvm_s390_vm_set_attr(kvm, &attr);
2403 case KVM_GET_DEVICE_ATTR: {
2405 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2407 r = kvm_s390_vm_get_attr(kvm, &attr);
2410 case KVM_HAS_DEVICE_ATTR: {
2412 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2414 r = kvm_s390_vm_has_attr(kvm, &attr);
2417 case KVM_S390_GET_SKEYS: {
2418 struct kvm_s390_skeys args;
2421 if (copy_from_user(&args, argp,
2422 sizeof(struct kvm_s390_skeys)))
2424 r = kvm_s390_get_skeys(kvm, &args);
2427 case KVM_S390_SET_SKEYS: {
2428 struct kvm_s390_skeys args;
2431 if (copy_from_user(&args, argp,
2432 sizeof(struct kvm_s390_skeys)))
2434 r = kvm_s390_set_skeys(kvm, &args);
2437 case KVM_S390_GET_CMMA_BITS: {
2438 struct kvm_s390_cmma_log args;
2441 if (copy_from_user(&args, argp, sizeof(args)))
2443 mutex_lock(&kvm->slots_lock);
2444 r = kvm_s390_get_cmma_bits(kvm, &args);
2445 mutex_unlock(&kvm->slots_lock);
2447 r = copy_to_user(argp, &args, sizeof(args));
2453 case KVM_S390_SET_CMMA_BITS: {
2454 struct kvm_s390_cmma_log args;
2457 if (copy_from_user(&args, argp, sizeof(args)))
2459 mutex_lock(&kvm->slots_lock);
2460 r = kvm_s390_set_cmma_bits(kvm, &args);
2461 mutex_unlock(&kvm->slots_lock);
2464 case KVM_S390_PV_COMMAND: {
2465 struct kvm_pv_cmd args;
2467 /* protvirt means user sigp */
2468 kvm->arch.user_cpu_state_ctrl = 1;
2470 if (!is_prot_virt_host()) {
2474 if (copy_from_user(&args, argp, sizeof(args))) {
2482 mutex_lock(&kvm->lock);
2483 r = kvm_s390_handle_pv(kvm, &args);
2484 mutex_unlock(&kvm->lock);
2485 if (copy_to_user(argp, &args, sizeof(args))) {
2498 static int kvm_s390_apxa_installed(void)
2500 struct ap_config_info info;
2502 if (ap_instructions_available()) {
2503 if (ap_qci(&info) == 0)
2511 * The format of the crypto control block (CRYCB) is specified in the 3 low
2512 * order bits of the CRYCB designation (CRYCBD) field as follows:
2513 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2514 * AP extended addressing (APXA) facility are installed.
2515 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2516 * Format 2: Both the APXA and MSAX3 facilities are installed
2518 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2520 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2522 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2523 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2525 /* Check whether MSAX3 is installed */
2526 if (!test_kvm_facility(kvm, 76))
2529 if (kvm_s390_apxa_installed())
2530 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2532 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2535 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2536 unsigned long *aqm, unsigned long *adm)
2538 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2540 mutex_lock(&kvm->lock);
2541 kvm_s390_vcpu_block_all(kvm);
2543 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2544 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2545 memcpy(crycb->apcb1.apm, apm, 32);
2546 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2547 apm[0], apm[1], apm[2], apm[3]);
2548 memcpy(crycb->apcb1.aqm, aqm, 32);
2549 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2550 aqm[0], aqm[1], aqm[2], aqm[3]);
2551 memcpy(crycb->apcb1.adm, adm, 32);
2552 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2553 adm[0], adm[1], adm[2], adm[3]);
2556 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2557 memcpy(crycb->apcb0.apm, apm, 8);
2558 memcpy(crycb->apcb0.aqm, aqm, 2);
2559 memcpy(crycb->apcb0.adm, adm, 2);
2560 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2561 apm[0], *((unsigned short *)aqm),
2562 *((unsigned short *)adm));
2564 default: /* Can not happen */
2568 /* recreate the shadow crycb for each vcpu */
2569 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2570 kvm_s390_vcpu_unblock_all(kvm);
2571 mutex_unlock(&kvm->lock);
2573 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2575 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2577 mutex_lock(&kvm->lock);
2578 kvm_s390_vcpu_block_all(kvm);
2580 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2581 sizeof(kvm->arch.crypto.crycb->apcb0));
2582 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2583 sizeof(kvm->arch.crypto.crycb->apcb1));
2585 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2586 /* recreate the shadow crycb for each vcpu */
2587 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2588 kvm_s390_vcpu_unblock_all(kvm);
2589 mutex_unlock(&kvm->lock);
2591 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2593 static u64 kvm_s390_get_initial_cpuid(void)
2598 cpuid.version = 0xff;
2599 return *((u64 *) &cpuid);
2602 static void kvm_s390_crypto_init(struct kvm *kvm)
2604 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2605 kvm_s390_set_crycb_format(kvm);
2607 if (!test_kvm_facility(kvm, 76))
2610 /* Enable AES/DEA protected key functions by default */
2611 kvm->arch.crypto.aes_kw = 1;
2612 kvm->arch.crypto.dea_kw = 1;
2613 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2614 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2615 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2616 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2619 static void sca_dispose(struct kvm *kvm)
2621 if (kvm->arch.use_esca)
2622 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2624 free_page((unsigned long)(kvm->arch.sca));
2625 kvm->arch.sca = NULL;
2628 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2630 gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2632 char debug_name[16];
2633 static unsigned long sca_offset;
2636 #ifdef CONFIG_KVM_S390_UCONTROL
2637 if (type & ~KVM_VM_S390_UCONTROL)
2639 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2646 rc = s390_enable_sie();
2652 if (!sclp.has_64bscao)
2653 alloc_flags |= GFP_DMA;
2654 rwlock_init(&kvm->arch.sca_lock);
2655 /* start with basic SCA */
2656 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2659 mutex_lock(&kvm_lock);
2661 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2663 kvm->arch.sca = (struct bsca_block *)
2664 ((char *) kvm->arch.sca + sca_offset);
2665 mutex_unlock(&kvm_lock);
2667 sprintf(debug_name, "kvm-%u", current->pid);
2669 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2673 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2674 kvm->arch.sie_page2 =
2675 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2676 if (!kvm->arch.sie_page2)
2679 kvm->arch.sie_page2->kvm = kvm;
2680 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2682 for (i = 0; i < kvm_s390_fac_size(); i++) {
2683 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2684 (kvm_s390_fac_base[i] |
2685 kvm_s390_fac_ext[i]);
2686 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2687 kvm_s390_fac_base[i];
2689 kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2691 /* we are always in czam mode - even on pre z14 machines */
2692 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2693 set_kvm_facility(kvm->arch.model.fac_list, 138);
2694 /* we emulate STHYI in kvm */
2695 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2696 set_kvm_facility(kvm->arch.model.fac_list, 74);
2697 if (MACHINE_HAS_TLB_GUEST) {
2698 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2699 set_kvm_facility(kvm->arch.model.fac_list, 147);
2702 if (css_general_characteristics.aiv && test_facility(65))
2703 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2705 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2706 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2708 kvm_s390_crypto_init(kvm);
2710 mutex_init(&kvm->arch.float_int.ais_lock);
2711 spin_lock_init(&kvm->arch.float_int.lock);
2712 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2713 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2714 init_waitqueue_head(&kvm->arch.ipte_wq);
2715 mutex_init(&kvm->arch.ipte_mutex);
2717 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2718 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2720 if (type & KVM_VM_S390_UCONTROL) {
2721 kvm->arch.gmap = NULL;
2722 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2724 if (sclp.hamax == U64_MAX)
2725 kvm->arch.mem_limit = TASK_SIZE_MAX;
2727 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2729 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2730 if (!kvm->arch.gmap)
2732 kvm->arch.gmap->private = kvm;
2733 kvm->arch.gmap->pfault_enabled = 0;
2736 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2737 kvm->arch.use_skf = sclp.has_skey;
2738 spin_lock_init(&kvm->arch.start_stop_lock);
2739 kvm_s390_vsie_init(kvm);
2741 kvm_s390_gisa_init(kvm);
2742 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2746 free_page((unsigned long)kvm->arch.sie_page2);
2747 debug_unregister(kvm->arch.dbf);
2749 KVM_EVENT(3, "creation of vm failed: %d", rc);
2753 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2757 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2758 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2759 kvm_s390_clear_local_irqs(vcpu);
2760 kvm_clear_async_pf_completion_queue(vcpu);
2761 if (!kvm_is_ucontrol(vcpu->kvm))
2764 if (kvm_is_ucontrol(vcpu->kvm))
2765 gmap_remove(vcpu->arch.gmap);
2767 if (vcpu->kvm->arch.use_cmma)
2768 kvm_s390_vcpu_unsetup_cmma(vcpu);
2769 /* We can not hold the vcpu mutex here, we are already dying */
2770 if (kvm_s390_pv_cpu_get_handle(vcpu))
2771 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2772 free_page((unsigned long)(vcpu->arch.sie_block));
2775 static void kvm_free_vcpus(struct kvm *kvm)
2778 struct kvm_vcpu *vcpu;
2780 kvm_for_each_vcpu(i, vcpu, kvm)
2781 kvm_vcpu_destroy(vcpu);
2783 mutex_lock(&kvm->lock);
2784 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2785 kvm->vcpus[i] = NULL;
2787 atomic_set(&kvm->online_vcpus, 0);
2788 mutex_unlock(&kvm->lock);
2791 void kvm_arch_destroy_vm(struct kvm *kvm)
2795 kvm_free_vcpus(kvm);
2797 kvm_s390_gisa_destroy(kvm);
2799 * We are already at the end of life and kvm->lock is not taken.
2800 * This is ok as the file descriptor is closed by now and nobody
2801 * can mess with the pv state. To avoid lockdep_assert_held from
2802 * complaining we do not use kvm_s390_pv_is_protected.
2804 if (kvm_s390_pv_get_handle(kvm))
2805 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2806 debug_unregister(kvm->arch.dbf);
2807 free_page((unsigned long)kvm->arch.sie_page2);
2808 if (!kvm_is_ucontrol(kvm))
2809 gmap_remove(kvm->arch.gmap);
2810 kvm_s390_destroy_adapters(kvm);
2811 kvm_s390_clear_float_irqs(kvm);
2812 kvm_s390_vsie_destroy(kvm);
2813 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2816 /* Section: vcpu related */
2817 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2819 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2820 if (!vcpu->arch.gmap)
2822 vcpu->arch.gmap->private = vcpu->kvm;
2827 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2829 if (!kvm_s390_use_sca_entries())
2831 read_lock(&vcpu->kvm->arch.sca_lock);
2832 if (vcpu->kvm->arch.use_esca) {
2833 struct esca_block *sca = vcpu->kvm->arch.sca;
2835 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2836 sca->cpu[vcpu->vcpu_id].sda = 0;
2838 struct bsca_block *sca = vcpu->kvm->arch.sca;
2840 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2841 sca->cpu[vcpu->vcpu_id].sda = 0;
2843 read_unlock(&vcpu->kvm->arch.sca_lock);
2846 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2848 if (!kvm_s390_use_sca_entries()) {
2849 struct bsca_block *sca = vcpu->kvm->arch.sca;
2851 /* we still need the basic sca for the ipte control */
2852 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2853 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2856 read_lock(&vcpu->kvm->arch.sca_lock);
2857 if (vcpu->kvm->arch.use_esca) {
2858 struct esca_block *sca = vcpu->kvm->arch.sca;
2860 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2861 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2862 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2863 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2864 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2866 struct bsca_block *sca = vcpu->kvm->arch.sca;
2868 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2869 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2870 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2871 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2873 read_unlock(&vcpu->kvm->arch.sca_lock);
2876 /* Basic SCA to Extended SCA data copy routines */
2877 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2880 d->sigp_ctrl.c = s->sigp_ctrl.c;
2881 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2884 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2888 d->ipte_control = s->ipte_control;
2890 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2891 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2894 static int sca_switch_to_extended(struct kvm *kvm)
2896 struct bsca_block *old_sca = kvm->arch.sca;
2897 struct esca_block *new_sca;
2898 struct kvm_vcpu *vcpu;
2899 unsigned int vcpu_idx;
2902 if (kvm->arch.use_esca)
2905 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2909 scaoh = (u32)((u64)(new_sca) >> 32);
2910 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2912 kvm_s390_vcpu_block_all(kvm);
2913 write_lock(&kvm->arch.sca_lock);
2915 sca_copy_b_to_e(new_sca, old_sca);
2917 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2918 vcpu->arch.sie_block->scaoh = scaoh;
2919 vcpu->arch.sie_block->scaol = scaol;
2920 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2922 kvm->arch.sca = new_sca;
2923 kvm->arch.use_esca = 1;
2925 write_unlock(&kvm->arch.sca_lock);
2926 kvm_s390_vcpu_unblock_all(kvm);
2928 free_page((unsigned long)old_sca);
2930 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2931 old_sca, kvm->arch.sca);
2935 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2939 if (!kvm_s390_use_sca_entries()) {
2940 if (id < KVM_MAX_VCPUS)
2944 if (id < KVM_S390_BSCA_CPU_SLOTS)
2946 if (!sclp.has_esca || !sclp.has_64bscao)
2949 mutex_lock(&kvm->lock);
2950 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2951 mutex_unlock(&kvm->lock);
2953 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2956 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2957 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2959 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2960 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2961 vcpu->arch.cputm_start = get_tod_clock_fast();
2962 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2965 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2966 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2968 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2969 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2970 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2971 vcpu->arch.cputm_start = 0;
2972 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2975 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2976 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2978 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2979 vcpu->arch.cputm_enabled = true;
2980 __start_cpu_timer_accounting(vcpu);
2983 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2984 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2986 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2987 __stop_cpu_timer_accounting(vcpu);
2988 vcpu->arch.cputm_enabled = false;
2991 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2993 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2994 __enable_cpu_timer_accounting(vcpu);
2998 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3000 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3001 __disable_cpu_timer_accounting(vcpu);
3005 /* set the cpu timer - may only be called from the VCPU thread itself */
3006 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3008 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3009 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3010 if (vcpu->arch.cputm_enabled)
3011 vcpu->arch.cputm_start = get_tod_clock_fast();
3012 vcpu->arch.sie_block->cputm = cputm;
3013 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3017 /* update and get the cpu timer - can also be called from other VCPU threads */
3018 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3023 if (unlikely(!vcpu->arch.cputm_enabled))
3024 return vcpu->arch.sie_block->cputm;
3026 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3028 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3030 * If the writer would ever execute a read in the critical
3031 * section, e.g. in irq context, we have a deadlock.
3033 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3034 value = vcpu->arch.sie_block->cputm;
3035 /* if cputm_start is 0, accounting is being started/stopped */
3036 if (likely(vcpu->arch.cputm_start))
3037 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3038 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3043 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3046 gmap_enable(vcpu->arch.enabled_gmap);
3047 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3048 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3049 __start_cpu_timer_accounting(vcpu);
3053 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3056 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3057 __stop_cpu_timer_accounting(vcpu);
3058 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3059 vcpu->arch.enabled_gmap = gmap_get_enabled();
3060 gmap_disable(vcpu->arch.enabled_gmap);
3064 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3066 mutex_lock(&vcpu->kvm->lock);
3068 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3069 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3071 mutex_unlock(&vcpu->kvm->lock);
3072 if (!kvm_is_ucontrol(vcpu->kvm)) {
3073 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3076 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3077 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3078 /* make vcpu_load load the right gmap on the first trigger */
3079 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3082 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3084 if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3085 test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3090 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3092 /* At least one ECC subfunction must be present */
3093 return kvm_has_pckmo_subfunc(kvm, 32) ||
3094 kvm_has_pckmo_subfunc(kvm, 33) ||
3095 kvm_has_pckmo_subfunc(kvm, 34) ||
3096 kvm_has_pckmo_subfunc(kvm, 40) ||
3097 kvm_has_pckmo_subfunc(kvm, 41);
3101 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3104 * If the AP instructions are not being interpreted and the MSAX3
3105 * facility is not configured for the guest, there is nothing to set up.
3107 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3110 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3111 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3112 vcpu->arch.sie_block->eca &= ~ECA_APIE;
3113 vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3115 if (vcpu->kvm->arch.crypto.apie)
3116 vcpu->arch.sie_block->eca |= ECA_APIE;
3118 /* Set up protected key support */
3119 if (vcpu->kvm->arch.crypto.aes_kw) {
3120 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3121 /* ecc is also wrapped with AES key */
3122 if (kvm_has_pckmo_ecc(vcpu->kvm))
3123 vcpu->arch.sie_block->ecd |= ECD_ECC;
3126 if (vcpu->kvm->arch.crypto.dea_kw)
3127 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3130 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3132 free_page(vcpu->arch.sie_block->cbrlo);
3133 vcpu->arch.sie_block->cbrlo = 0;
3136 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3138 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3139 if (!vcpu->arch.sie_block->cbrlo)
3144 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3146 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3148 vcpu->arch.sie_block->ibc = model->ibc;
3149 if (test_kvm_facility(vcpu->kvm, 7))
3150 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3153 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3158 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3162 if (test_kvm_facility(vcpu->kvm, 78))
3163 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3164 else if (test_kvm_facility(vcpu->kvm, 8))
3165 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3167 kvm_s390_vcpu_setup_model(vcpu);
3169 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3170 if (MACHINE_HAS_ESOP)
3171 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3172 if (test_kvm_facility(vcpu->kvm, 9))
3173 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3174 if (test_kvm_facility(vcpu->kvm, 73))
3175 vcpu->arch.sie_block->ecb |= ECB_TE;
3177 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3178 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3179 if (test_kvm_facility(vcpu->kvm, 130))
3180 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3181 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3183 vcpu->arch.sie_block->eca |= ECA_CEI;
3185 vcpu->arch.sie_block->eca |= ECA_IB;
3187 vcpu->arch.sie_block->eca |= ECA_SII;
3188 if (sclp.has_sigpif)
3189 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3190 if (test_kvm_facility(vcpu->kvm, 129)) {
3191 vcpu->arch.sie_block->eca |= ECA_VX;
3192 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3194 if (test_kvm_facility(vcpu->kvm, 139))
3195 vcpu->arch.sie_block->ecd |= ECD_MEF;
3196 if (test_kvm_facility(vcpu->kvm, 156))
3197 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3198 if (vcpu->arch.sie_block->gd) {
3199 vcpu->arch.sie_block->eca |= ECA_AIV;
3200 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3201 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3203 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3205 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3208 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3210 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3212 if (vcpu->kvm->arch.use_cmma) {
3213 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3217 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3218 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3220 vcpu->arch.sie_block->hpid = HPID_KVM;
3222 kvm_s390_vcpu_crypto_setup(vcpu);
3224 mutex_lock(&vcpu->kvm->lock);
3225 if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3226 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3228 kvm_s390_vcpu_unsetup_cmma(vcpu);
3230 mutex_unlock(&vcpu->kvm->lock);
3235 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3237 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3242 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3244 struct sie_page *sie_page;
3247 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3248 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3252 vcpu->arch.sie_block = &sie_page->sie_block;
3253 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3255 /* the real guest size will always be smaller than msl */
3256 vcpu->arch.sie_block->mso = 0;
3257 vcpu->arch.sie_block->msl = sclp.hamax;
3259 vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3260 spin_lock_init(&vcpu->arch.local_int.lock);
3261 vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3262 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3263 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3264 seqcount_init(&vcpu->arch.cputm_seqcount);
3266 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3267 kvm_clear_async_pf_completion_queue(vcpu);
3268 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3275 kvm_s390_set_prefix(vcpu, 0);
3276 if (test_kvm_facility(vcpu->kvm, 64))
3277 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3278 if (test_kvm_facility(vcpu->kvm, 82))
3279 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3280 if (test_kvm_facility(vcpu->kvm, 133))
3281 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3282 if (test_kvm_facility(vcpu->kvm, 156))
3283 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3284 /* fprs can be synchronized via vrs, even if the guest has no vx. With
3285 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3288 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3290 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3292 if (kvm_is_ucontrol(vcpu->kvm)) {
3293 rc = __kvm_ucontrol_vcpu_init(vcpu);
3295 goto out_free_sie_block;
3298 VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3299 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3300 trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3302 rc = kvm_s390_vcpu_setup(vcpu);
3304 goto out_ucontrol_uninit;
3307 out_ucontrol_uninit:
3308 if (kvm_is_ucontrol(vcpu->kvm))
3309 gmap_remove(vcpu->arch.gmap);
3311 free_page((unsigned long)(vcpu->arch.sie_block));
3315 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3317 return kvm_s390_vcpu_has_irq(vcpu, 0);
3320 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3322 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3325 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3327 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3331 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3333 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3336 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3338 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3342 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3344 return atomic_read(&vcpu->arch.sie_block->prog20) &
3345 (PROG_BLOCK_SIE | PROG_REQUEST);
3348 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3350 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3354 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3355 * If the CPU is not running (e.g. waiting as idle) the function will
3356 * return immediately. */
3357 void exit_sie(struct kvm_vcpu *vcpu)
3359 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3360 kvm_s390_vsie_kick(vcpu);
3361 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3365 /* Kick a guest cpu out of SIE to process a request synchronously */
3366 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3368 kvm_make_request(req, vcpu);
3369 kvm_s390_vcpu_request(vcpu);
3372 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3375 struct kvm *kvm = gmap->private;
3376 struct kvm_vcpu *vcpu;
3377 unsigned long prefix;
3380 if (gmap_is_shadow(gmap))
3382 if (start >= 1UL << 31)
3383 /* We are only interested in prefix pages */
3385 kvm_for_each_vcpu(i, vcpu, kvm) {
3386 /* match against both prefix pages */
3387 prefix = kvm_s390_get_prefix(vcpu);
3388 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3389 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3391 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3396 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3398 /* do not poll with more than halt_poll_max_steal percent of steal time */
3399 if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3400 halt_poll_max_steal) {
3401 vcpu->stat.halt_no_poll_steal++;
3407 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3409 /* kvm common code refers to this, but never calls it */
3414 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3415 struct kvm_one_reg *reg)
3420 case KVM_REG_S390_TODPR:
3421 r = put_user(vcpu->arch.sie_block->todpr,
3422 (u32 __user *)reg->addr);
3424 case KVM_REG_S390_EPOCHDIFF:
3425 r = put_user(vcpu->arch.sie_block->epoch,
3426 (u64 __user *)reg->addr);
3428 case KVM_REG_S390_CPU_TIMER:
3429 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3430 (u64 __user *)reg->addr);
3432 case KVM_REG_S390_CLOCK_COMP:
3433 r = put_user(vcpu->arch.sie_block->ckc,
3434 (u64 __user *)reg->addr);
3436 case KVM_REG_S390_PFTOKEN:
3437 r = put_user(vcpu->arch.pfault_token,
3438 (u64 __user *)reg->addr);
3440 case KVM_REG_S390_PFCOMPARE:
3441 r = put_user(vcpu->arch.pfault_compare,
3442 (u64 __user *)reg->addr);
3444 case KVM_REG_S390_PFSELECT:
3445 r = put_user(vcpu->arch.pfault_select,
3446 (u64 __user *)reg->addr);
3448 case KVM_REG_S390_PP:
3449 r = put_user(vcpu->arch.sie_block->pp,
3450 (u64 __user *)reg->addr);
3452 case KVM_REG_S390_GBEA:
3453 r = put_user(vcpu->arch.sie_block->gbea,
3454 (u64 __user *)reg->addr);
3463 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3464 struct kvm_one_reg *reg)
3470 case KVM_REG_S390_TODPR:
3471 r = get_user(vcpu->arch.sie_block->todpr,
3472 (u32 __user *)reg->addr);
3474 case KVM_REG_S390_EPOCHDIFF:
3475 r = get_user(vcpu->arch.sie_block->epoch,
3476 (u64 __user *)reg->addr);
3478 case KVM_REG_S390_CPU_TIMER:
3479 r = get_user(val, (u64 __user *)reg->addr);
3481 kvm_s390_set_cpu_timer(vcpu, val);
3483 case KVM_REG_S390_CLOCK_COMP:
3484 r = get_user(vcpu->arch.sie_block->ckc,
3485 (u64 __user *)reg->addr);
3487 case KVM_REG_S390_PFTOKEN:
3488 r = get_user(vcpu->arch.pfault_token,
3489 (u64 __user *)reg->addr);
3490 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3491 kvm_clear_async_pf_completion_queue(vcpu);
3493 case KVM_REG_S390_PFCOMPARE:
3494 r = get_user(vcpu->arch.pfault_compare,
3495 (u64 __user *)reg->addr);
3497 case KVM_REG_S390_PFSELECT:
3498 r = get_user(vcpu->arch.pfault_select,
3499 (u64 __user *)reg->addr);
3501 case KVM_REG_S390_PP:
3502 r = get_user(vcpu->arch.sie_block->pp,
3503 (u64 __user *)reg->addr);
3505 case KVM_REG_S390_GBEA:
3506 r = get_user(vcpu->arch.sie_block->gbea,
3507 (u64 __user *)reg->addr);
3516 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3518 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3519 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3520 memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3522 kvm_clear_async_pf_completion_queue(vcpu);
3523 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3524 kvm_s390_vcpu_stop(vcpu);
3525 kvm_s390_clear_local_irqs(vcpu);
3528 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3530 /* Initial reset is a superset of the normal reset */
3531 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3534 * This equals initial cpu reset in pop, but we don't switch to ESA.
3535 * We do not only reset the internal data, but also ...
3537 vcpu->arch.sie_block->gpsw.mask = 0;
3538 vcpu->arch.sie_block->gpsw.addr = 0;
3539 kvm_s390_set_prefix(vcpu, 0);
3540 kvm_s390_set_cpu_timer(vcpu, 0);
3541 vcpu->arch.sie_block->ckc = 0;
3542 memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3543 vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3544 vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3546 /* ... the data in sync regs */
3547 memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3548 vcpu->run->s.regs.ckc = 0;
3549 vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3550 vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3551 vcpu->run->psw_addr = 0;
3552 vcpu->run->psw_mask = 0;
3553 vcpu->run->s.regs.todpr = 0;
3554 vcpu->run->s.regs.cputm = 0;
3555 vcpu->run->s.regs.ckc = 0;
3556 vcpu->run->s.regs.pp = 0;
3557 vcpu->run->s.regs.gbea = 1;
3558 vcpu->run->s.regs.fpc = 0;
3560 * Do not reset these registers in the protected case, as some of
3561 * them are overlayed and they are not accessible in this case
3564 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3565 vcpu->arch.sie_block->gbea = 1;
3566 vcpu->arch.sie_block->pp = 0;
3567 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3568 vcpu->arch.sie_block->todpr = 0;
3572 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3574 struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3576 /* Clear reset is a superset of the initial reset */
3577 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3579 memset(®s->gprs, 0, sizeof(regs->gprs));
3580 memset(®s->vrs, 0, sizeof(regs->vrs));
3581 memset(®s->acrs, 0, sizeof(regs->acrs));
3582 memset(®s->gscb, 0, sizeof(regs->gscb));
3585 regs->etoken_extension = 0;
3588 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3591 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3596 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3599 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3604 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3605 struct kvm_sregs *sregs)
3609 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3610 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3616 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3617 struct kvm_sregs *sregs)
3621 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3622 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3628 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3634 if (test_fp_ctl(fpu->fpc)) {
3638 vcpu->run->s.regs.fpc = fpu->fpc;
3640 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3641 (freg_t *) fpu->fprs);
3643 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3650 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3654 /* make sure we have the latest values */
3657 convert_vx_to_fp((freg_t *) fpu->fprs,
3658 (__vector128 *) vcpu->run->s.regs.vrs);
3660 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3661 fpu->fpc = vcpu->run->s.regs.fpc;
3667 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3671 if (!is_vcpu_stopped(vcpu))
3674 vcpu->run->psw_mask = psw.mask;
3675 vcpu->run->psw_addr = psw.addr;
3680 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3681 struct kvm_translation *tr)
3683 return -EINVAL; /* not implemented yet */
3686 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3687 KVM_GUESTDBG_USE_HW_BP | \
3688 KVM_GUESTDBG_ENABLE)
3690 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3691 struct kvm_guest_debug *dbg)
3697 vcpu->guest_debug = 0;
3698 kvm_s390_clear_bp_data(vcpu);
3700 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3704 if (!sclp.has_gpere) {
3709 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3710 vcpu->guest_debug = dbg->control;
3711 /* enforce guest PER */
3712 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3714 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3715 rc = kvm_s390_import_bp_data(vcpu, dbg);
3717 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3718 vcpu->arch.guestdbg.last_bp = 0;
3722 vcpu->guest_debug = 0;
3723 kvm_s390_clear_bp_data(vcpu);
3724 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3732 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3733 struct kvm_mp_state *mp_state)
3739 /* CHECK_STOP and LOAD are not supported yet */
3740 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3741 KVM_MP_STATE_OPERATING;
3747 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3748 struct kvm_mp_state *mp_state)
3754 /* user space knows about this interface - let it control the state */
3755 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3757 switch (mp_state->mp_state) {
3758 case KVM_MP_STATE_STOPPED:
3759 rc = kvm_s390_vcpu_stop(vcpu);
3761 case KVM_MP_STATE_OPERATING:
3762 rc = kvm_s390_vcpu_start(vcpu);
3764 case KVM_MP_STATE_LOAD:
3765 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3769 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3771 case KVM_MP_STATE_CHECK_STOP:
3772 fallthrough; /* CHECK_STOP and LOAD are not supported yet */
3781 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3783 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3786 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3789 kvm_s390_vcpu_request_handled(vcpu);
3790 if (!kvm_request_pending(vcpu))
3793 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3794 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3795 * This ensures that the ipte instruction for this request has
3796 * already finished. We might race against a second unmapper that
3797 * wants to set the blocking bit. Lets just retry the request loop.
3799 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3801 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3802 kvm_s390_get_prefix(vcpu),
3803 PAGE_SIZE * 2, PROT_WRITE);
3805 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3811 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3812 vcpu->arch.sie_block->ihcpu = 0xffff;
3816 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3817 if (!ibs_enabled(vcpu)) {
3818 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3819 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3824 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3825 if (ibs_enabled(vcpu)) {
3826 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3827 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3832 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3833 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3837 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3839 * Disable CMM virtualization; we will emulate the ESSA
3840 * instruction manually, in order to provide additional
3841 * functionalities needed for live migration.
3843 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3847 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3849 * Re-enable CMM virtualization if CMMA is available and
3850 * CMM has been used.
3852 if ((vcpu->kvm->arch.use_cmma) &&
3853 (vcpu->kvm->mm->context.uses_cmm))
3854 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3858 /* nothing to do, just clear the request */
3859 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3860 /* we left the vsie handler, nothing to do, just clear the request */
3861 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3866 void kvm_s390_set_tod_clock(struct kvm *kvm,
3867 const struct kvm_s390_vm_tod_clock *gtod)
3869 struct kvm_vcpu *vcpu;
3870 struct kvm_s390_tod_clock_ext htod;
3873 mutex_lock(&kvm->lock);
3876 get_tod_clock_ext((char *)&htod);
3878 kvm->arch.epoch = gtod->tod - htod.tod;
3880 if (test_kvm_facility(kvm, 139)) {
3881 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3882 if (kvm->arch.epoch > gtod->tod)
3883 kvm->arch.epdx -= 1;
3886 kvm_s390_vcpu_block_all(kvm);
3887 kvm_for_each_vcpu(i, vcpu, kvm) {
3888 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3889 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3892 kvm_s390_vcpu_unblock_all(kvm);
3894 mutex_unlock(&kvm->lock);
3898 * kvm_arch_fault_in_page - fault-in guest page if necessary
3899 * @vcpu: The corresponding virtual cpu
3900 * @gpa: Guest physical address
3901 * @writable: Whether the page should be writable or not
3903 * Make sure that a guest page has been faulted-in on the host.
3905 * Return: Zero on success, negative error code otherwise.
3907 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3909 return gmap_fault(vcpu->arch.gmap, gpa,
3910 writable ? FAULT_FLAG_WRITE : 0);
3913 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3914 unsigned long token)
3916 struct kvm_s390_interrupt inti;
3917 struct kvm_s390_irq irq;
3920 irq.u.ext.ext_params2 = token;
3921 irq.type = KVM_S390_INT_PFAULT_INIT;
3922 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3924 inti.type = KVM_S390_INT_PFAULT_DONE;
3925 inti.parm64 = token;
3926 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3930 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3931 struct kvm_async_pf *work)
3933 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3934 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3939 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3940 struct kvm_async_pf *work)
3942 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3943 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3946 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3947 struct kvm_async_pf *work)
3949 /* s390 will always inject the page directly */
3952 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3955 * s390 will always inject the page directly,
3956 * but we still want check_async_completion to cleanup
3961 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3964 struct kvm_arch_async_pf arch;
3966 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3968 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3969 vcpu->arch.pfault_compare)
3971 if (psw_extint_disabled(vcpu))
3973 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3975 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3977 if (!vcpu->arch.gmap->pfault_enabled)
3980 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3981 hva += current->thread.gmap_addr & ~PAGE_MASK;
3982 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3985 return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3988 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3993 * On s390 notifications for arriving pages will be delivered directly
3994 * to the guest but the house keeping for completed pfaults is
3995 * handled outside the worker.
3997 kvm_check_async_pf_completion(vcpu);
3999 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4000 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4005 if (!kvm_is_ucontrol(vcpu->kvm)) {
4006 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4011 rc = kvm_s390_handle_requests(vcpu);
4015 if (guestdbg_enabled(vcpu)) {
4016 kvm_s390_backup_guest_per_regs(vcpu);
4017 kvm_s390_patch_guest_per_regs(vcpu);
4020 clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4022 vcpu->arch.sie_block->icptcode = 0;
4023 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4024 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4025 trace_kvm_s390_sie_enter(vcpu, cpuflags);
4030 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4032 struct kvm_s390_pgm_info pgm_info = {
4033 .code = PGM_ADDRESSING,
4038 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4039 trace_kvm_s390_sie_fault(vcpu);
4042 * We want to inject an addressing exception, which is defined as a
4043 * suppressing or terminating exception. However, since we came here
4044 * by a DAT access exception, the PSW still points to the faulting
4045 * instruction since DAT exceptions are nullifying. So we've got
4046 * to look up the current opcode to get the length of the instruction
4047 * to be able to forward the PSW.
4049 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4050 ilen = insn_length(opcode);
4054 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4055 * Forward by arbitrary ilc, injection will take care of
4056 * nullification if necessary.
4058 pgm_info = vcpu->arch.pgm;
4061 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4062 kvm_s390_forward_psw(vcpu, ilen);
4063 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4066 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4068 struct mcck_volatile_info *mcck_info;
4069 struct sie_page *sie_page;
4071 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4072 vcpu->arch.sie_block->icptcode);
4073 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4075 if (guestdbg_enabled(vcpu))
4076 kvm_s390_restore_guest_per_regs(vcpu);
4078 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4079 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4081 if (exit_reason == -EINTR) {
4082 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4083 sie_page = container_of(vcpu->arch.sie_block,
4084 struct sie_page, sie_block);
4085 mcck_info = &sie_page->mcck_info;
4086 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4090 if (vcpu->arch.sie_block->icptcode > 0) {
4091 int rc = kvm_handle_sie_intercept(vcpu);
4093 if (rc != -EOPNOTSUPP)
4095 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4096 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4097 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4098 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4100 } else if (exit_reason != -EFAULT) {
4101 vcpu->stat.exit_null++;
4103 } else if (kvm_is_ucontrol(vcpu->kvm)) {
4104 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4105 vcpu->run->s390_ucontrol.trans_exc_code =
4106 current->thread.gmap_addr;
4107 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4109 } else if (current->thread.gmap_pfault) {
4110 trace_kvm_s390_major_guest_pfault(vcpu);
4111 current->thread.gmap_pfault = 0;
4112 if (kvm_arch_setup_async_pf(vcpu))
4114 vcpu->stat.pfault_sync++;
4115 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4117 return vcpu_post_run_fault_in_sie(vcpu);
4120 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4121 static int __vcpu_run(struct kvm_vcpu *vcpu)
4123 int rc, exit_reason;
4124 struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4127 * We try to hold kvm->srcu during most of vcpu_run (except when run-
4128 * ning the guest), so that memslots (and other stuff) are protected
4130 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4133 rc = vcpu_pre_run(vcpu);
4137 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4139 * As PF_VCPU will be used in fault handler, between
4140 * guest_enter and guest_exit should be no uaccess.
4142 local_irq_disable();
4143 guest_enter_irqoff();
4144 __disable_cpu_timer_accounting(vcpu);
4146 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4147 memcpy(sie_page->pv_grregs,
4148 vcpu->run->s.regs.gprs,
4149 sizeof(sie_page->pv_grregs));
4151 if (test_cpu_flag(CIF_FPU))
4153 exit_reason = sie64a(vcpu->arch.sie_block,
4154 vcpu->run->s.regs.gprs);
4155 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4156 memcpy(vcpu->run->s.regs.gprs,
4157 sie_page->pv_grregs,
4158 sizeof(sie_page->pv_grregs));
4160 * We're not allowed to inject interrupts on intercepts
4161 * that leave the guest state in an "in-between" state
4162 * where the next SIE entry will do a continuation.
4163 * Fence interrupts in our "internal" PSW.
4165 if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4166 vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4167 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4170 local_irq_disable();
4171 __enable_cpu_timer_accounting(vcpu);
4172 guest_exit_irqoff();
4174 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4176 rc = vcpu_post_run(vcpu, exit_reason);
4177 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4179 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4183 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4185 struct kvm_run *kvm_run = vcpu->run;
4186 struct runtime_instr_cb *riccb;
4189 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4190 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4191 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4192 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4193 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4194 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4195 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4196 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4198 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4199 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4200 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4201 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4202 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4203 kvm_clear_async_pf_completion_queue(vcpu);
4205 if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4206 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4207 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4210 * If userspace sets the riccb (e.g. after migration) to a valid state,
4211 * we should enable RI here instead of doing the lazy enablement.
4213 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4214 test_kvm_facility(vcpu->kvm, 64) &&
4216 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4217 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4218 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4221 * If userspace sets the gscb (e.g. after migration) to non-zero,
4222 * we should enable GS here instead of doing the lazy enablement.
4224 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4225 test_kvm_facility(vcpu->kvm, 133) &&
4227 !vcpu->arch.gs_enabled) {
4228 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4229 vcpu->arch.sie_block->ecb |= ECB_GS;
4230 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4231 vcpu->arch.gs_enabled = 1;
4233 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4234 test_kvm_facility(vcpu->kvm, 82)) {
4235 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4236 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4238 if (MACHINE_HAS_GS) {
4240 __ctl_set_bit(2, 4);
4241 if (current->thread.gs_cb) {
4242 vcpu->arch.host_gscb = current->thread.gs_cb;
4243 save_gs_cb(vcpu->arch.host_gscb);
4245 if (vcpu->arch.gs_enabled) {
4246 current->thread.gs_cb = (struct gs_cb *)
4247 &vcpu->run->s.regs.gscb;
4248 restore_gs_cb(current->thread.gs_cb);
4252 /* SIE will load etoken directly from SDNX and therefore kvm_run */
4255 static void sync_regs(struct kvm_vcpu *vcpu)
4257 struct kvm_run *kvm_run = vcpu->run;
4259 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4260 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4261 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4262 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4263 /* some control register changes require a tlb flush */
4264 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4266 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4267 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4268 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4270 save_access_regs(vcpu->arch.host_acrs);
4271 restore_access_regs(vcpu->run->s.regs.acrs);
4272 /* save host (userspace) fprs/vrs */
4274 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4275 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4277 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4279 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4280 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4281 if (test_fp_ctl(current->thread.fpu.fpc))
4282 /* User space provided an invalid FPC, let's clear it */
4283 current->thread.fpu.fpc = 0;
4285 /* Sync fmt2 only data */
4286 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4287 sync_regs_fmt2(vcpu);
4290 * In several places we have to modify our internal view to
4291 * not do things that are disallowed by the ultravisor. For
4292 * example we must not inject interrupts after specific exits
4293 * (e.g. 112 prefix page not secure). We do this by turning
4294 * off the machine check, external and I/O interrupt bits
4295 * of our PSW copy. To avoid getting validity intercepts, we
4296 * do only accept the condition code from userspace.
4298 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4299 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4303 kvm_run->kvm_dirty_regs = 0;
4306 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4308 struct kvm_run *kvm_run = vcpu->run;
4310 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4311 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4312 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4313 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4314 kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4315 if (MACHINE_HAS_GS) {
4316 __ctl_set_bit(2, 4);
4317 if (vcpu->arch.gs_enabled)
4318 save_gs_cb(current->thread.gs_cb);
4320 current->thread.gs_cb = vcpu->arch.host_gscb;
4321 restore_gs_cb(vcpu->arch.host_gscb);
4323 if (!vcpu->arch.host_gscb)
4324 __ctl_clear_bit(2, 4);
4325 vcpu->arch.host_gscb = NULL;
4327 /* SIE will save etoken directly into SDNX and therefore kvm_run */
4330 static void store_regs(struct kvm_vcpu *vcpu)
4332 struct kvm_run *kvm_run = vcpu->run;
4334 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4335 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4336 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4337 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4338 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4339 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4340 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4341 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4342 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4343 save_access_regs(vcpu->run->s.regs.acrs);
4344 restore_access_regs(vcpu->arch.host_acrs);
4345 /* Save guest register state */
4347 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4348 /* Restore will be done lazily at return */
4349 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4350 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4351 if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4352 store_regs_fmt2(vcpu);
4355 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4357 struct kvm_run *kvm_run = vcpu->run;
4360 if (kvm_run->immediate_exit)
4363 if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4364 kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4369 if (guestdbg_exit_pending(vcpu)) {
4370 kvm_s390_prepare_debug_exit(vcpu);
4375 kvm_sigset_activate(vcpu);
4378 * no need to check the return value of vcpu_start as it can only have
4379 * an error for protvirt, but protvirt means user cpu state
4381 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4382 kvm_s390_vcpu_start(vcpu);
4383 } else if (is_vcpu_stopped(vcpu)) {
4384 pr_err_ratelimited("can't run stopped vcpu %d\n",
4391 enable_cpu_timer_accounting(vcpu);
4394 rc = __vcpu_run(vcpu);
4396 if (signal_pending(current) && !rc) {
4397 kvm_run->exit_reason = KVM_EXIT_INTR;
4401 if (guestdbg_exit_pending(vcpu) && !rc) {
4402 kvm_s390_prepare_debug_exit(vcpu);
4406 if (rc == -EREMOTE) {
4407 /* userspace support is needed, kvm_run has been prepared */
4411 disable_cpu_timer_accounting(vcpu);
4414 kvm_sigset_deactivate(vcpu);
4416 vcpu->stat.exit_userspace++;
4423 * store status at address
4424 * we use have two special cases:
4425 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4426 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4428 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4430 unsigned char archmode = 1;
4431 freg_t fprs[NUM_FPRS];
4436 px = kvm_s390_get_prefix(vcpu);
4437 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4438 if (write_guest_abs(vcpu, 163, &archmode, 1))
4441 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4442 if (write_guest_real(vcpu, 163, &archmode, 1))
4446 gpa -= __LC_FPREGS_SAVE_AREA;
4448 /* manually convert vector registers if necessary */
4449 if (MACHINE_HAS_VX) {
4450 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4451 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4454 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4455 vcpu->run->s.regs.fprs, 128);
4457 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4458 vcpu->run->s.regs.gprs, 128);
4459 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4460 &vcpu->arch.sie_block->gpsw, 16);
4461 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4463 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4464 &vcpu->run->s.regs.fpc, 4);
4465 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4466 &vcpu->arch.sie_block->todpr, 4);
4467 cputm = kvm_s390_get_cpu_timer(vcpu);
4468 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4470 clkcomp = vcpu->arch.sie_block->ckc >> 8;
4471 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4473 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4474 &vcpu->run->s.regs.acrs, 64);
4475 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4476 &vcpu->arch.sie_block->gcr, 128);
4477 return rc ? -EFAULT : 0;
4480 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4483 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4484 * switch in the run ioctl. Let's update our copies before we save
4485 * it into the save area
4488 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4489 save_access_regs(vcpu->run->s.regs.acrs);
4491 return kvm_s390_store_status_unloaded(vcpu, addr);
4494 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4496 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4497 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4500 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4503 struct kvm_vcpu *vcpu;
4505 kvm_for_each_vcpu(i, vcpu, kvm) {
4506 __disable_ibs_on_vcpu(vcpu);
4510 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4514 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4515 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4518 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4520 int i, online_vcpus, r = 0, started_vcpus = 0;
4522 if (!is_vcpu_stopped(vcpu))
4525 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4526 /* Only one cpu at a time may enter/leave the STOPPED state. */
4527 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4528 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4530 /* Let's tell the UV that we want to change into the operating state */
4531 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4532 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4534 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4539 for (i = 0; i < online_vcpus; i++) {
4540 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4544 if (started_vcpus == 0) {
4545 /* we're the only active VCPU -> speed it up */
4546 __enable_ibs_on_vcpu(vcpu);
4547 } else if (started_vcpus == 1) {
4549 * As we are starting a second VCPU, we have to disable
4550 * the IBS facility on all VCPUs to remove potentially
4551 * oustanding ENABLE requests.
4553 __disable_ibs_on_all_vcpus(vcpu->kvm);
4556 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4558 * The real PSW might have changed due to a RESTART interpreted by the
4559 * ultravisor. We block all interrupts and let the next sie exit
4562 if (kvm_s390_pv_cpu_is_protected(vcpu))
4563 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4565 * Another VCPU might have used IBS while we were offline.
4566 * Let's play safe and flush the VCPU at startup.
4568 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4569 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4573 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4575 int i, online_vcpus, r = 0, started_vcpus = 0;
4576 struct kvm_vcpu *started_vcpu = NULL;
4578 if (is_vcpu_stopped(vcpu))
4581 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4582 /* Only one cpu at a time may enter/leave the STOPPED state. */
4583 spin_lock(&vcpu->kvm->arch.start_stop_lock);
4584 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4586 /* Let's tell the UV that we want to change into the stopped state */
4587 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4588 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4590 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4595 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4596 kvm_s390_clear_stop_irq(vcpu);
4598 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4599 __disable_ibs_on_vcpu(vcpu);
4601 for (i = 0; i < online_vcpus; i++) {
4602 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4604 started_vcpu = vcpu->kvm->vcpus[i];
4608 if (started_vcpus == 1) {
4610 * As we only have one VCPU left, we want to enable the
4611 * IBS facility for that VCPU to speed it up.
4613 __enable_ibs_on_vcpu(started_vcpu);
4616 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4620 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4621 struct kvm_enable_cap *cap)
4629 case KVM_CAP_S390_CSS_SUPPORT:
4630 if (!vcpu->kvm->arch.css_support) {
4631 vcpu->kvm->arch.css_support = 1;
4632 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4633 trace_kvm_s390_enable_css(vcpu->kvm);
4644 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4645 struct kvm_s390_mem_op *mop)
4647 void __user *uaddr = (void __user *)mop->buf;
4650 if (mop->flags || !mop->size)
4652 if (mop->size + mop->sida_offset < mop->size)
4654 if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4658 case KVM_S390_MEMOP_SIDA_READ:
4659 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4660 mop->sida_offset), mop->size))
4664 case KVM_S390_MEMOP_SIDA_WRITE:
4665 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4666 mop->sida_offset), uaddr, mop->size))
4672 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4673 struct kvm_s390_mem_op *mop)
4675 void __user *uaddr = (void __user *)mop->buf;
4676 void *tmpbuf = NULL;
4678 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4679 | KVM_S390_MEMOP_F_CHECK_ONLY;
4681 if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4684 if (mop->size > MEM_OP_MAX_SIZE)
4687 if (kvm_s390_pv_cpu_is_protected(vcpu))
4690 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4691 tmpbuf = vmalloc(mop->size);
4697 case KVM_S390_MEMOP_LOGICAL_READ:
4698 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4699 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4700 mop->size, GACC_FETCH);
4703 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4705 if (copy_to_user(uaddr, tmpbuf, mop->size))
4709 case KVM_S390_MEMOP_LOGICAL_WRITE:
4710 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4711 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4712 mop->size, GACC_STORE);
4715 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4719 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4723 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4724 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4730 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4731 struct kvm_s390_mem_op *mop)
4735 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4738 case KVM_S390_MEMOP_LOGICAL_READ:
4739 case KVM_S390_MEMOP_LOGICAL_WRITE:
4740 r = kvm_s390_guest_mem_op(vcpu, mop);
4742 case KVM_S390_MEMOP_SIDA_READ:
4743 case KVM_S390_MEMOP_SIDA_WRITE:
4744 /* we are locked against sida going away by the vcpu->mutex */
4745 r = kvm_s390_guest_sida_op(vcpu, mop);
4751 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4755 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4756 unsigned int ioctl, unsigned long arg)
4758 struct kvm_vcpu *vcpu = filp->private_data;
4759 void __user *argp = (void __user *)arg;
4762 case KVM_S390_IRQ: {
4763 struct kvm_s390_irq s390irq;
4765 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4767 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4769 case KVM_S390_INTERRUPT: {
4770 struct kvm_s390_interrupt s390int;
4771 struct kvm_s390_irq s390irq = {};
4773 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4775 if (s390int_to_s390irq(&s390int, &s390irq))
4777 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4780 return -ENOIOCTLCMD;
4783 long kvm_arch_vcpu_ioctl(struct file *filp,
4784 unsigned int ioctl, unsigned long arg)
4786 struct kvm_vcpu *vcpu = filp->private_data;
4787 void __user *argp = (void __user *)arg;
4795 case KVM_S390_STORE_STATUS:
4796 idx = srcu_read_lock(&vcpu->kvm->srcu);
4797 r = kvm_s390_store_status_unloaded(vcpu, arg);
4798 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4800 case KVM_S390_SET_INITIAL_PSW: {
4804 if (copy_from_user(&psw, argp, sizeof(psw)))
4806 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4809 case KVM_S390_CLEAR_RESET:
4811 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4812 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4813 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4814 UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4815 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4819 case KVM_S390_INITIAL_RESET:
4821 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4822 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4823 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4824 UVC_CMD_CPU_RESET_INITIAL,
4826 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4830 case KVM_S390_NORMAL_RESET:
4832 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4833 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4834 r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4835 UVC_CMD_CPU_RESET, &rc, &rrc);
4836 VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4840 case KVM_SET_ONE_REG:
4841 case KVM_GET_ONE_REG: {
4842 struct kvm_one_reg reg;
4844 if (kvm_s390_pv_cpu_is_protected(vcpu))
4847 if (copy_from_user(®, argp, sizeof(reg)))
4849 if (ioctl == KVM_SET_ONE_REG)
4850 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4852 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4855 #ifdef CONFIG_KVM_S390_UCONTROL
4856 case KVM_S390_UCAS_MAP: {
4857 struct kvm_s390_ucas_mapping ucasmap;
4859 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4864 if (!kvm_is_ucontrol(vcpu->kvm)) {
4869 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4870 ucasmap.vcpu_addr, ucasmap.length);
4873 case KVM_S390_UCAS_UNMAP: {
4874 struct kvm_s390_ucas_mapping ucasmap;
4876 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4881 if (!kvm_is_ucontrol(vcpu->kvm)) {
4886 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4891 case KVM_S390_VCPU_FAULT: {
4892 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4895 case KVM_ENABLE_CAP:
4897 struct kvm_enable_cap cap;
4899 if (copy_from_user(&cap, argp, sizeof(cap)))
4901 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4904 case KVM_S390_MEM_OP: {
4905 struct kvm_s390_mem_op mem_op;
4907 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4908 r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4913 case KVM_S390_SET_IRQ_STATE: {
4914 struct kvm_s390_irq_state irq_state;
4917 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4919 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4920 irq_state.len == 0 ||
4921 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4925 /* do not use irq_state.flags, it will break old QEMUs */
4926 r = kvm_s390_set_irq_state(vcpu,
4927 (void __user *) irq_state.buf,
4931 case KVM_S390_GET_IRQ_STATE: {
4932 struct kvm_s390_irq_state irq_state;
4935 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4937 if (irq_state.len == 0) {
4941 /* do not use irq_state.flags, it will break old QEMUs */
4942 r = kvm_s390_get_irq_state(vcpu,
4943 (__u8 __user *) irq_state.buf,
4955 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4957 #ifdef CONFIG_KVM_S390_UCONTROL
4958 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4959 && (kvm_is_ucontrol(vcpu->kvm))) {
4960 vmf->page = virt_to_page(vcpu->arch.sie_block);
4961 get_page(vmf->page);
4965 return VM_FAULT_SIGBUS;
4968 /* Section: memory related */
4969 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4970 struct kvm_memory_slot *memslot,
4971 const struct kvm_userspace_memory_region *mem,
4972 enum kvm_mr_change change)
4974 /* A few sanity checks. We can have memory slots which have to be
4975 located/ended at a segment boundary (1MB). The memory in userland is
4976 ok to be fragmented into various different vmas. It is okay to mmap()
4977 and munmap() stuff in this slot after doing this call at any time */
4979 if (mem->userspace_addr & 0xffffful)
4982 if (mem->memory_size & 0xffffful)
4985 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4988 /* When we are protected, we should not change the memory slots */
4989 if (kvm_s390_pv_get_handle(kvm))
4994 void kvm_arch_commit_memory_region(struct kvm *kvm,
4995 const struct kvm_userspace_memory_region *mem,
4996 struct kvm_memory_slot *old,
4997 const struct kvm_memory_slot *new,
4998 enum kvm_mr_change change)
5004 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5005 old->npages * PAGE_SIZE);
5008 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5009 old->npages * PAGE_SIZE);
5014 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5015 mem->guest_phys_addr, mem->memory_size);
5017 case KVM_MR_FLAGS_ONLY:
5020 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5023 pr_warn("failed to commit memory region\n");
5027 static inline unsigned long nonhyp_mask(int i)
5029 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5031 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5034 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5036 vcpu->valid_wakeup = false;
5039 static int __init kvm_s390_init(void)
5043 if (!sclp.has_sief2) {
5044 pr_info("SIE is not available\n");
5048 if (nested && hpage) {
5049 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5053 for (i = 0; i < 16; i++)
5054 kvm_s390_fac_base[i] |=
5055 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5057 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5060 static void __exit kvm_s390_exit(void)
5065 module_init(kvm_s390_init);
5066 module_exit(kvm_s390_exit);
5069 * Enable autoloading of the kvm module.
5070 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5071 * since x86 takes a different approach.
5073 #include <linux/miscdevice.h>
5074 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5075 MODULE_ALIAS("devname:kvm");