1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
47 #define KMSG_COMPONENT "kvm-s390"
49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51 #define CREATE_TRACE_POINTS
53 #include "trace-s390.h"
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { "userspace_handled", VCPU_STAT(exit_userspace) },
65 { "exit_null", VCPU_STAT(exit_null) },
66 { "exit_validity", VCPU_STAT(exit_validity) },
67 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 { "exit_external_request", VCPU_STAT(exit_external_request) },
69 { "exit_io_request", VCPU_STAT(exit_io_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 { "deliver_program", VCPU_STAT(deliver_program) },
94 { "deliver_io", VCPU_STAT(deliver_io) },
95 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 { "inject_ckc", VCPU_STAT(inject_ckc) },
98 { "inject_cputm", VCPU_STAT(inject_cputm) },
99 { "inject_external_call", VCPU_STAT(inject_external_call) },
100 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 { "inject_io", VM_STAT(inject_io) },
103 { "inject_mchk", VCPU_STAT(inject_mchk) },
104 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 { "inject_program", VCPU_STAT(inject_program) },
106 { "inject_restart", VCPU_STAT(inject_restart) },
107 { "inject_service_signal", VM_STAT(inject_service_signal) },
108 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 { "inject_virtio", VM_STAT(inject_virtio) },
112 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 { "instruction_gs", VCPU_STAT(instruction_gs) },
114 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 { "instruction_sck", VCPU_STAT(instruction_sck) },
121 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 { "instruction_spx", VCPU_STAT(instruction_spx) },
123 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 { "instruction_stap", VCPU_STAT(instruction_stap) },
125 { "instruction_iske", VCPU_STAT(instruction_iske) },
126 { "instruction_ri", VCPU_STAT(instruction_ri) },
127 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 { "instruction_sske", VCPU_STAT(instruction_sske) },
129 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 { "instruction_essa", VCPU_STAT(instruction_essa) },
131 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 { "instruction_tb", VCPU_STAT(instruction_tb) },
134 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 { "instruction_sie", VCPU_STAT(instruction_sie) },
139 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
159 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
160 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
161 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
165 struct kvm_s390_tod_clock_ext {
171 /* allow nested virtualization in KVM (if enabled by user space) */
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 /* allow 1m huge page guest backing, if !nested */
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182 * For now we handle at most 16 double words as this is what the s390 base
183 * kernel handles and stores in the prefix page. If we ever need to go beyond
184 * this, this requires changes to code, but the external uapi can stay.
186 #define SIZE_INTERNAL 16
189 * Base feature mask that defines default mask for facilities. Consists of the
190 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
194 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
195 * and defines the facilities that can be enabled via a cpu model.
197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
199 static unsigned long kvm_s390_fac_size(void)
201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
202 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
203 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
204 sizeof(S390_lowcore.stfle_fac_list));
206 return SIZE_INTERNAL;
209 /* available cpu features supported by kvm */
210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
211 /* available subfunctions indicated via query / "test bit" */
212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
214 static struct gmap_notifier gmap_notifier;
215 static struct gmap_notifier vsie_gmap_notifier;
216 debug_info_t *kvm_s390_dbf;
218 /* Section: not file related */
219 int kvm_arch_hardware_enable(void)
221 /* every s390 is virtualization enabled ;-) */
225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
233 * The TOD jumps by delta, we have to compensate this by adding
234 * -delta to the epoch.
238 /* sign-extension - we're adding to signed values below */
243 if (scb->ecd & ECD_MEF) {
244 scb->epdx += delta_idx;
245 if (scb->epoch < delta)
251 * This callback is executed during stop_machine(). All CPUs are therefore
252 * temporarily stopped. In order not to change guest behavior, we have to
253 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
254 * so a CPU won't be stopped while calculating with the epoch.
256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
260 struct kvm_vcpu *vcpu;
262 unsigned long long *delta = v;
264 list_for_each_entry(kvm, &vm_list, vm_list) {
265 kvm_for_each_vcpu(i, vcpu, kvm) {
266 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
268 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
269 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
271 if (vcpu->arch.cputm_enabled)
272 vcpu->arch.cputm_start += *delta;
273 if (vcpu->arch.vsie_block)
274 kvm_clock_sync_scb(vcpu->arch.vsie_block,
281 static struct notifier_block kvm_clock_notifier = {
282 .notifier_call = kvm_clock_sync,
285 int kvm_arch_hardware_setup(void)
287 gmap_notifier.notifier_call = kvm_gmap_notifier;
288 gmap_register_pte_notifier(&gmap_notifier);
289 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
290 gmap_register_pte_notifier(&vsie_gmap_notifier);
291 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
292 &kvm_clock_notifier);
296 void kvm_arch_hardware_unsetup(void)
298 gmap_unregister_pte_notifier(&gmap_notifier);
299 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
300 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
301 &kvm_clock_notifier);
304 static void allow_cpu_feat(unsigned long nr)
306 set_bit_inv(nr, kvm_s390_available_cpu_feat);
309 static inline int plo_test_bit(unsigned char nr)
311 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
315 /* Parameter registers are ignored for "test bit" */
325 static void kvm_s390_cpu_feat_init(void)
329 for (i = 0; i < 256; ++i) {
331 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
334 if (test_facility(28)) /* TOD-clock steering */
335 ptff(kvm_s390_available_subfunc.ptff,
336 sizeof(kvm_s390_available_subfunc.ptff),
339 if (test_facility(17)) { /* MSA */
340 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
341 kvm_s390_available_subfunc.kmac);
342 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
343 kvm_s390_available_subfunc.kmc);
344 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
345 kvm_s390_available_subfunc.km);
346 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
347 kvm_s390_available_subfunc.kimd);
348 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
349 kvm_s390_available_subfunc.klmd);
351 if (test_facility(76)) /* MSA3 */
352 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
353 kvm_s390_available_subfunc.pckmo);
354 if (test_facility(77)) { /* MSA4 */
355 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
356 kvm_s390_available_subfunc.kmctr);
357 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
358 kvm_s390_available_subfunc.kmf);
359 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
360 kvm_s390_available_subfunc.kmo);
361 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
362 kvm_s390_available_subfunc.pcc);
364 if (test_facility(57)) /* MSA5 */
365 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
366 kvm_s390_available_subfunc.ppno);
368 if (test_facility(146)) /* MSA8 */
369 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
370 kvm_s390_available_subfunc.kma);
372 if (MACHINE_HAS_ESOP)
373 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
375 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
376 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
378 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
379 !test_facility(3) || !nested)
381 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
382 if (sclp.has_64bscao)
383 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
385 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
387 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
389 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
391 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
393 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
395 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
397 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
399 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
400 * all skey handling functions read/set the skey from the PGSTE
401 * instead of the real storage key.
403 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
404 * pages being detected as preserved although they are resident.
406 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
407 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
409 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
410 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
411 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
413 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
414 * cannot easily shadow the SCA because of the ipte lock.
418 int kvm_arch_init(void *opaque)
420 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
424 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
425 debug_unregister(kvm_s390_dbf);
429 kvm_s390_cpu_feat_init();
431 /* Register floating interrupt controller interface. */
432 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
435 void kvm_arch_exit(void)
437 debug_unregister(kvm_s390_dbf);
440 /* Section: device related */
441 long kvm_arch_dev_ioctl(struct file *filp,
442 unsigned int ioctl, unsigned long arg)
444 if (ioctl == KVM_S390_ENABLE_SIE)
445 return s390_enable_sie();
449 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
454 case KVM_CAP_S390_PSW:
455 case KVM_CAP_S390_GMAP:
456 case KVM_CAP_SYNC_MMU:
457 #ifdef CONFIG_KVM_S390_UCONTROL
458 case KVM_CAP_S390_UCONTROL:
460 case KVM_CAP_ASYNC_PF:
461 case KVM_CAP_SYNC_REGS:
462 case KVM_CAP_ONE_REG:
463 case KVM_CAP_ENABLE_CAP:
464 case KVM_CAP_S390_CSS_SUPPORT:
465 case KVM_CAP_IOEVENTFD:
466 case KVM_CAP_DEVICE_CTRL:
467 case KVM_CAP_ENABLE_CAP_VM:
468 case KVM_CAP_S390_IRQCHIP:
469 case KVM_CAP_VM_ATTRIBUTES:
470 case KVM_CAP_MP_STATE:
471 case KVM_CAP_IMMEDIATE_EXIT:
472 case KVM_CAP_S390_INJECT_IRQ:
473 case KVM_CAP_S390_USER_SIGP:
474 case KVM_CAP_S390_USER_STSI:
475 case KVM_CAP_S390_SKEYS:
476 case KVM_CAP_S390_IRQ_STATE:
477 case KVM_CAP_S390_USER_INSTR0:
478 case KVM_CAP_S390_CMMA_MIGRATION:
479 case KVM_CAP_S390_AIS:
480 case KVM_CAP_S390_AIS_MIGRATION:
483 case KVM_CAP_S390_HPAGE_1M:
485 if (hpage && !kvm_is_ucontrol(kvm))
488 case KVM_CAP_S390_MEM_OP:
491 case KVM_CAP_NR_VCPUS:
492 case KVM_CAP_MAX_VCPUS:
493 r = KVM_S390_BSCA_CPU_SLOTS;
494 if (!kvm_s390_use_sca_entries())
496 else if (sclp.has_esca && sclp.has_64bscao)
497 r = KVM_S390_ESCA_CPU_SLOTS;
499 case KVM_CAP_NR_MEMSLOTS:
500 r = KVM_USER_MEM_SLOTS;
502 case KVM_CAP_S390_COW:
503 r = MACHINE_HAS_ESOP;
505 case KVM_CAP_S390_VECTOR_REGISTERS:
508 case KVM_CAP_S390_RI:
509 r = test_facility(64);
511 case KVM_CAP_S390_GS:
512 r = test_facility(133);
514 case KVM_CAP_S390_BPB:
515 r = test_facility(82);
523 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
524 struct kvm_memory_slot *memslot)
527 gfn_t cur_gfn, last_gfn;
528 unsigned long gaddr, vmaddr;
529 struct gmap *gmap = kvm->arch.gmap;
530 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
532 /* Loop over all guest segments */
533 cur_gfn = memslot->base_gfn;
534 last_gfn = memslot->base_gfn + memslot->npages;
535 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
536 gaddr = gfn_to_gpa(cur_gfn);
537 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
538 if (kvm_is_error_hva(vmaddr))
541 bitmap_zero(bitmap, _PAGE_ENTRIES);
542 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
543 for (i = 0; i < _PAGE_ENTRIES; i++) {
544 if (test_bit(i, bitmap))
545 mark_page_dirty(kvm, cur_gfn + i);
548 if (fatal_signal_pending(current))
554 /* Section: vm related */
555 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
558 * Get (and clear) the dirty memory log for a memory slot.
560 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
561 struct kvm_dirty_log *log)
565 struct kvm_memslots *slots;
566 struct kvm_memory_slot *memslot;
569 if (kvm_is_ucontrol(kvm))
572 mutex_lock(&kvm->slots_lock);
575 if (log->slot >= KVM_USER_MEM_SLOTS)
578 slots = kvm_memslots(kvm);
579 memslot = id_to_memslot(slots, log->slot);
581 if (!memslot->dirty_bitmap)
584 kvm_s390_sync_dirty_log(kvm, memslot);
585 r = kvm_get_dirty_log(kvm, log, &is_dirty);
589 /* Clear the dirty log */
591 n = kvm_dirty_bitmap_bytes(memslot);
592 memset(memslot->dirty_bitmap, 0, n);
596 mutex_unlock(&kvm->slots_lock);
600 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
603 struct kvm_vcpu *vcpu;
605 kvm_for_each_vcpu(i, vcpu, kvm) {
606 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
610 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
618 case KVM_CAP_S390_IRQCHIP:
619 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
620 kvm->arch.use_irqchip = 1;
623 case KVM_CAP_S390_USER_SIGP:
624 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
625 kvm->arch.user_sigp = 1;
628 case KVM_CAP_S390_VECTOR_REGISTERS:
629 mutex_lock(&kvm->lock);
630 if (kvm->created_vcpus) {
632 } else if (MACHINE_HAS_VX) {
633 set_kvm_facility(kvm->arch.model.fac_mask, 129);
634 set_kvm_facility(kvm->arch.model.fac_list, 129);
635 if (test_facility(134)) {
636 set_kvm_facility(kvm->arch.model.fac_mask, 134);
637 set_kvm_facility(kvm->arch.model.fac_list, 134);
639 if (test_facility(135)) {
640 set_kvm_facility(kvm->arch.model.fac_mask, 135);
641 set_kvm_facility(kvm->arch.model.fac_list, 135);
646 mutex_unlock(&kvm->lock);
647 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
648 r ? "(not available)" : "(success)");
650 case KVM_CAP_S390_RI:
652 mutex_lock(&kvm->lock);
653 if (kvm->created_vcpus) {
655 } else if (test_facility(64)) {
656 set_kvm_facility(kvm->arch.model.fac_mask, 64);
657 set_kvm_facility(kvm->arch.model.fac_list, 64);
660 mutex_unlock(&kvm->lock);
661 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
662 r ? "(not available)" : "(success)");
664 case KVM_CAP_S390_AIS:
665 mutex_lock(&kvm->lock);
666 if (kvm->created_vcpus) {
669 set_kvm_facility(kvm->arch.model.fac_mask, 72);
670 set_kvm_facility(kvm->arch.model.fac_list, 72);
673 mutex_unlock(&kvm->lock);
674 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
675 r ? "(not available)" : "(success)");
677 case KVM_CAP_S390_GS:
679 mutex_lock(&kvm->lock);
680 if (kvm->created_vcpus) {
682 } else if (test_facility(133)) {
683 set_kvm_facility(kvm->arch.model.fac_mask, 133);
684 set_kvm_facility(kvm->arch.model.fac_list, 133);
687 mutex_unlock(&kvm->lock);
688 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
689 r ? "(not available)" : "(success)");
691 case KVM_CAP_S390_HPAGE_1M:
692 mutex_lock(&kvm->lock);
693 if (kvm->created_vcpus)
695 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
699 down_write(&kvm->mm->mmap_sem);
700 kvm->mm->context.allow_gmap_hpage_1m = 1;
701 up_write(&kvm->mm->mmap_sem);
703 * We might have to create fake 4k page
704 * tables. To avoid that the hardware works on
705 * stale PGSTEs, we emulate these instructions.
707 kvm->arch.use_skf = 0;
708 kvm->arch.use_pfmfi = 0;
710 mutex_unlock(&kvm->lock);
711 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
712 r ? "(not available)" : "(success)");
714 case KVM_CAP_S390_USER_STSI:
715 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
716 kvm->arch.user_stsi = 1;
719 case KVM_CAP_S390_USER_INSTR0:
720 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
721 kvm->arch.user_instr0 = 1;
722 icpt_operexc_on_all_vcpus(kvm);
732 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
736 switch (attr->attr) {
737 case KVM_S390_VM_MEM_LIMIT_SIZE:
739 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
740 kvm->arch.mem_limit);
741 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
751 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
755 switch (attr->attr) {
756 case KVM_S390_VM_MEM_ENABLE_CMMA:
761 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
762 mutex_lock(&kvm->lock);
763 if (kvm->created_vcpus)
765 else if (kvm->mm->context.allow_gmap_hpage_1m)
768 kvm->arch.use_cmma = 1;
769 /* Not compatible with cmma. */
770 kvm->arch.use_pfmfi = 0;
773 mutex_unlock(&kvm->lock);
775 case KVM_S390_VM_MEM_CLR_CMMA:
780 if (!kvm->arch.use_cmma)
783 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
784 mutex_lock(&kvm->lock);
785 idx = srcu_read_lock(&kvm->srcu);
786 s390_reset_cmma(kvm->arch.gmap->mm);
787 srcu_read_unlock(&kvm->srcu, idx);
788 mutex_unlock(&kvm->lock);
791 case KVM_S390_VM_MEM_LIMIT_SIZE: {
792 unsigned long new_limit;
794 if (kvm_is_ucontrol(kvm))
797 if (get_user(new_limit, (u64 __user *)attr->addr))
800 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
801 new_limit > kvm->arch.mem_limit)
807 /* gmap_create takes last usable address */
808 if (new_limit != KVM_S390_NO_MEM_LIMIT)
812 mutex_lock(&kvm->lock);
813 if (!kvm->created_vcpus) {
814 /* gmap_create will round the limit up */
815 struct gmap *new = gmap_create(current->mm, new_limit);
820 gmap_remove(kvm->arch.gmap);
822 kvm->arch.gmap = new;
826 mutex_unlock(&kvm->lock);
827 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
828 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
829 (void *) kvm->arch.gmap->asce);
839 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
841 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
843 struct kvm_vcpu *vcpu;
846 kvm_s390_vcpu_block_all(kvm);
848 kvm_for_each_vcpu(i, vcpu, kvm) {
849 kvm_s390_vcpu_crypto_setup(vcpu);
850 /* recreate the shadow crycb by leaving the VSIE handler */
851 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
854 kvm_s390_vcpu_unblock_all(kvm);
857 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
859 mutex_lock(&kvm->lock);
860 switch (attr->attr) {
861 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
862 if (!test_kvm_facility(kvm, 76)) {
863 mutex_unlock(&kvm->lock);
867 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
868 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
869 kvm->arch.crypto.aes_kw = 1;
870 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
872 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
873 if (!test_kvm_facility(kvm, 76)) {
874 mutex_unlock(&kvm->lock);
878 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
879 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
880 kvm->arch.crypto.dea_kw = 1;
881 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
883 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
884 if (!test_kvm_facility(kvm, 76)) {
885 mutex_unlock(&kvm->lock);
888 kvm->arch.crypto.aes_kw = 0;
889 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
890 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
891 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
893 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
894 if (!test_kvm_facility(kvm, 76)) {
895 mutex_unlock(&kvm->lock);
898 kvm->arch.crypto.dea_kw = 0;
899 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
900 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
901 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
903 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
904 if (!ap_instructions_available()) {
905 mutex_unlock(&kvm->lock);
908 kvm->arch.crypto.apie = 1;
910 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
911 if (!ap_instructions_available()) {
912 mutex_unlock(&kvm->lock);
915 kvm->arch.crypto.apie = 0;
918 mutex_unlock(&kvm->lock);
922 kvm_s390_vcpu_crypto_reset_all(kvm);
923 mutex_unlock(&kvm->lock);
927 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
930 struct kvm_vcpu *vcpu;
932 kvm_for_each_vcpu(cx, vcpu, kvm)
933 kvm_s390_sync_request(req, vcpu);
937 * Must be called with kvm->srcu held to avoid races on memslots, and with
938 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
940 static int kvm_s390_vm_start_migration(struct kvm *kvm)
942 struct kvm_memory_slot *ms;
943 struct kvm_memslots *slots;
944 unsigned long ram_pages = 0;
947 /* migration mode already enabled */
948 if (kvm->arch.migration_mode)
950 slots = kvm_memslots(kvm);
951 if (!slots || !slots->used_slots)
954 if (!kvm->arch.use_cmma) {
955 kvm->arch.migration_mode = 1;
958 /* mark all the pages in active slots as dirty */
959 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
960 ms = slots->memslots + slotnr;
962 * The second half of the bitmap is only used on x86,
963 * and would be wasted otherwise, so we put it to good
964 * use here to keep track of the state of the storage
967 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
968 ram_pages += ms->npages;
970 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
971 kvm->arch.migration_mode = 1;
972 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
977 * Must be called with kvm->slots_lock to avoid races with ourselves and
978 * kvm_s390_vm_start_migration.
980 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
982 /* migration mode already disabled */
983 if (!kvm->arch.migration_mode)
985 kvm->arch.migration_mode = 0;
986 if (kvm->arch.use_cmma)
987 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
991 static int kvm_s390_vm_set_migration(struct kvm *kvm,
992 struct kvm_device_attr *attr)
996 mutex_lock(&kvm->slots_lock);
997 switch (attr->attr) {
998 case KVM_S390_VM_MIGRATION_START:
999 res = kvm_s390_vm_start_migration(kvm);
1001 case KVM_S390_VM_MIGRATION_STOP:
1002 res = kvm_s390_vm_stop_migration(kvm);
1007 mutex_unlock(&kvm->slots_lock);
1012 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1013 struct kvm_device_attr *attr)
1015 u64 mig = kvm->arch.migration_mode;
1017 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1020 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1025 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1027 struct kvm_s390_vm_tod_clock gtod;
1029 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1032 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1034 kvm_s390_set_tod_clock(kvm, >od);
1036 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1037 gtod.epoch_idx, gtod.tod);
1042 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1046 if (copy_from_user(>od_high, (void __user *)attr->addr,
1052 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1057 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1059 struct kvm_s390_vm_tod_clock gtod = { 0 };
1061 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1065 kvm_s390_set_tod_clock(kvm, >od);
1066 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1070 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1077 switch (attr->attr) {
1078 case KVM_S390_VM_TOD_EXT:
1079 ret = kvm_s390_set_tod_ext(kvm, attr);
1081 case KVM_S390_VM_TOD_HIGH:
1082 ret = kvm_s390_set_tod_high(kvm, attr);
1084 case KVM_S390_VM_TOD_LOW:
1085 ret = kvm_s390_set_tod_low(kvm, attr);
1094 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1095 struct kvm_s390_vm_tod_clock *gtod)
1097 struct kvm_s390_tod_clock_ext htod;
1101 get_tod_clock_ext((char *)&htod);
1103 gtod->tod = htod.tod + kvm->arch.epoch;
1104 gtod->epoch_idx = 0;
1105 if (test_kvm_facility(kvm, 139)) {
1106 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1107 if (gtod->tod < htod.tod)
1108 gtod->epoch_idx += 1;
1114 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1116 struct kvm_s390_vm_tod_clock gtod;
1118 memset(>od, 0, sizeof(gtod));
1119 kvm_s390_get_tod_clock(kvm, >od);
1120 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1123 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1124 gtod.epoch_idx, gtod.tod);
1128 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1132 if (copy_to_user((void __user *)attr->addr, >od_high,
1135 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1140 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1144 gtod = kvm_s390_get_tod_clock_fast(kvm);
1145 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1147 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1152 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1159 switch (attr->attr) {
1160 case KVM_S390_VM_TOD_EXT:
1161 ret = kvm_s390_get_tod_ext(kvm, attr);
1163 case KVM_S390_VM_TOD_HIGH:
1164 ret = kvm_s390_get_tod_high(kvm, attr);
1166 case KVM_S390_VM_TOD_LOW:
1167 ret = kvm_s390_get_tod_low(kvm, attr);
1176 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1178 struct kvm_s390_vm_cpu_processor *proc;
1179 u16 lowest_ibc, unblocked_ibc;
1182 mutex_lock(&kvm->lock);
1183 if (kvm->created_vcpus) {
1187 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1192 if (!copy_from_user(proc, (void __user *)attr->addr,
1194 kvm->arch.model.cpuid = proc->cpuid;
1195 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1196 unblocked_ibc = sclp.ibc & 0xfff;
1197 if (lowest_ibc && proc->ibc) {
1198 if (proc->ibc > unblocked_ibc)
1199 kvm->arch.model.ibc = unblocked_ibc;
1200 else if (proc->ibc < lowest_ibc)
1201 kvm->arch.model.ibc = lowest_ibc;
1203 kvm->arch.model.ibc = proc->ibc;
1205 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1206 S390_ARCH_FAC_LIST_SIZE_BYTE);
1207 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1208 kvm->arch.model.ibc,
1209 kvm->arch.model.cpuid);
1210 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1211 kvm->arch.model.fac_list[0],
1212 kvm->arch.model.fac_list[1],
1213 kvm->arch.model.fac_list[2]);
1218 mutex_unlock(&kvm->lock);
1222 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1223 struct kvm_device_attr *attr)
1225 struct kvm_s390_vm_cpu_feat data;
1227 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1229 if (!bitmap_subset((unsigned long *) data.feat,
1230 kvm_s390_available_cpu_feat,
1231 KVM_S390_VM_CPU_FEAT_NR_BITS))
1234 mutex_lock(&kvm->lock);
1235 if (kvm->created_vcpus) {
1236 mutex_unlock(&kvm->lock);
1239 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1240 KVM_S390_VM_CPU_FEAT_NR_BITS);
1241 mutex_unlock(&kvm->lock);
1242 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1249 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1250 struct kvm_device_attr *attr)
1253 * Once supported by kernel + hw, we have to store the subfunctions
1254 * in kvm->arch and remember that user space configured them.
1259 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1263 switch (attr->attr) {
1264 case KVM_S390_VM_CPU_PROCESSOR:
1265 ret = kvm_s390_set_processor(kvm, attr);
1267 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1268 ret = kvm_s390_set_processor_feat(kvm, attr);
1270 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1271 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1277 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1279 struct kvm_s390_vm_cpu_processor *proc;
1282 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1287 proc->cpuid = kvm->arch.model.cpuid;
1288 proc->ibc = kvm->arch.model.ibc;
1289 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1290 S390_ARCH_FAC_LIST_SIZE_BYTE);
1291 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1292 kvm->arch.model.ibc,
1293 kvm->arch.model.cpuid);
1294 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1295 kvm->arch.model.fac_list[0],
1296 kvm->arch.model.fac_list[1],
1297 kvm->arch.model.fac_list[2]);
1298 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1305 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1307 struct kvm_s390_vm_cpu_machine *mach;
1310 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1315 get_cpu_id((struct cpuid *) &mach->cpuid);
1316 mach->ibc = sclp.ibc;
1317 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1318 S390_ARCH_FAC_LIST_SIZE_BYTE);
1319 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1320 sizeof(S390_lowcore.stfle_fac_list));
1321 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1322 kvm->arch.model.ibc,
1323 kvm->arch.model.cpuid);
1324 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1328 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1332 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1339 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1340 struct kvm_device_attr *attr)
1342 struct kvm_s390_vm_cpu_feat data;
1344 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1345 KVM_S390_VM_CPU_FEAT_NR_BITS);
1346 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1348 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1355 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1356 struct kvm_device_attr *attr)
1358 struct kvm_s390_vm_cpu_feat data;
1360 bitmap_copy((unsigned long *) data.feat,
1361 kvm_s390_available_cpu_feat,
1362 KVM_S390_VM_CPU_FEAT_NR_BITS);
1363 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1365 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1372 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1373 struct kvm_device_attr *attr)
1376 * Once we can actually configure subfunctions (kernel + hw support),
1377 * we have to check if they were already set by user space, if so copy
1378 * them from kvm->arch.
1383 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1384 struct kvm_device_attr *attr)
1386 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1387 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1391 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1395 switch (attr->attr) {
1396 case KVM_S390_VM_CPU_PROCESSOR:
1397 ret = kvm_s390_get_processor(kvm, attr);
1399 case KVM_S390_VM_CPU_MACHINE:
1400 ret = kvm_s390_get_machine(kvm, attr);
1402 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1403 ret = kvm_s390_get_processor_feat(kvm, attr);
1405 case KVM_S390_VM_CPU_MACHINE_FEAT:
1406 ret = kvm_s390_get_machine_feat(kvm, attr);
1408 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1409 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1411 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1412 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1418 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1422 switch (attr->group) {
1423 case KVM_S390_VM_MEM_CTRL:
1424 ret = kvm_s390_set_mem_control(kvm, attr);
1426 case KVM_S390_VM_TOD:
1427 ret = kvm_s390_set_tod(kvm, attr);
1429 case KVM_S390_VM_CPU_MODEL:
1430 ret = kvm_s390_set_cpu_model(kvm, attr);
1432 case KVM_S390_VM_CRYPTO:
1433 ret = kvm_s390_vm_set_crypto(kvm, attr);
1435 case KVM_S390_VM_MIGRATION:
1436 ret = kvm_s390_vm_set_migration(kvm, attr);
1446 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1450 switch (attr->group) {
1451 case KVM_S390_VM_MEM_CTRL:
1452 ret = kvm_s390_get_mem_control(kvm, attr);
1454 case KVM_S390_VM_TOD:
1455 ret = kvm_s390_get_tod(kvm, attr);
1457 case KVM_S390_VM_CPU_MODEL:
1458 ret = kvm_s390_get_cpu_model(kvm, attr);
1460 case KVM_S390_VM_MIGRATION:
1461 ret = kvm_s390_vm_get_migration(kvm, attr);
1471 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1475 switch (attr->group) {
1476 case KVM_S390_VM_MEM_CTRL:
1477 switch (attr->attr) {
1478 case KVM_S390_VM_MEM_ENABLE_CMMA:
1479 case KVM_S390_VM_MEM_CLR_CMMA:
1480 ret = sclp.has_cmma ? 0 : -ENXIO;
1482 case KVM_S390_VM_MEM_LIMIT_SIZE:
1490 case KVM_S390_VM_TOD:
1491 switch (attr->attr) {
1492 case KVM_S390_VM_TOD_LOW:
1493 case KVM_S390_VM_TOD_HIGH:
1501 case KVM_S390_VM_CPU_MODEL:
1502 switch (attr->attr) {
1503 case KVM_S390_VM_CPU_PROCESSOR:
1504 case KVM_S390_VM_CPU_MACHINE:
1505 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1506 case KVM_S390_VM_CPU_MACHINE_FEAT:
1507 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1510 /* configuring subfunctions is not supported yet */
1511 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1517 case KVM_S390_VM_CRYPTO:
1518 switch (attr->attr) {
1519 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1520 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1521 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1522 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1525 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1526 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1527 ret = ap_instructions_available() ? 0 : -ENXIO;
1534 case KVM_S390_VM_MIGRATION:
1545 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1549 int srcu_idx, i, r = 0;
1551 if (args->flags != 0)
1554 /* Is this guest using storage keys? */
1555 if (!mm_uses_skeys(current->mm))
1556 return KVM_S390_GET_SKEYS_NONE;
1558 /* Enforce sane limit on memory allocation */
1559 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1562 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1566 down_read(¤t->mm->mmap_sem);
1567 srcu_idx = srcu_read_lock(&kvm->srcu);
1568 for (i = 0; i < args->count; i++) {
1569 hva = gfn_to_hva(kvm, args->start_gfn + i);
1570 if (kvm_is_error_hva(hva)) {
1575 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1579 srcu_read_unlock(&kvm->srcu, srcu_idx);
1580 up_read(¤t->mm->mmap_sem);
1583 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1584 sizeof(uint8_t) * args->count);
1593 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1597 int srcu_idx, i, r = 0;
1600 if (args->flags != 0)
1603 /* Enforce sane limit on memory allocation */
1604 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1607 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1611 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1612 sizeof(uint8_t) * args->count);
1618 /* Enable storage key handling for the guest */
1619 r = s390_enable_skey();
1624 down_read(¤t->mm->mmap_sem);
1625 srcu_idx = srcu_read_lock(&kvm->srcu);
1626 while (i < args->count) {
1628 hva = gfn_to_hva(kvm, args->start_gfn + i);
1629 if (kvm_is_error_hva(hva)) {
1634 /* Lowest order bit is reserved */
1635 if (keys[i] & 0x01) {
1640 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1642 r = fixup_user_fault(current, current->mm, hva,
1643 FAULT_FLAG_WRITE, &unlocked);
1650 srcu_read_unlock(&kvm->srcu, srcu_idx);
1651 up_read(¤t->mm->mmap_sem);
1658 * Base address and length must be sent at the start of each block, therefore
1659 * it's cheaper to send some clean data, as long as it's less than the size of
1662 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1663 /* for consistency */
1664 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1667 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1668 * address falls in a hole. In that case the index of one of the memslots
1669 * bordering the hole is returned.
1671 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1673 int start = 0, end = slots->used_slots;
1674 int slot = atomic_read(&slots->lru_slot);
1675 struct kvm_memory_slot *memslots = slots->memslots;
1677 if (gfn >= memslots[slot].base_gfn &&
1678 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1681 while (start < end) {
1682 slot = start + (end - start) / 2;
1684 if (gfn >= memslots[slot].base_gfn)
1690 if (gfn >= memslots[start].base_gfn &&
1691 gfn < memslots[start].base_gfn + memslots[start].npages) {
1692 atomic_set(&slots->lru_slot, start);
1698 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1699 u8 *res, unsigned long bufsize)
1701 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1704 while (args->count < bufsize) {
1705 hva = gfn_to_hva(kvm, cur_gfn);
1707 * We return an error if the first value was invalid, but we
1708 * return successfully if at least one value was copied.
1710 if (kvm_is_error_hva(hva))
1711 return args->count ? 0 : -EFAULT;
1712 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1714 res[args->count++] = (pgstev >> 24) & 0x43;
1721 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1722 unsigned long cur_gfn)
1724 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1725 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1726 unsigned long ofs = cur_gfn - ms->base_gfn;
1728 if (ms->base_gfn + ms->npages <= cur_gfn) {
1730 /* If we are above the highest slot, wrap around */
1732 slotidx = slots->used_slots - 1;
1734 ms = slots->memslots + slotidx;
1737 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1738 while ((slotidx > 0) && (ofs >= ms->npages)) {
1740 ms = slots->memslots + slotidx;
1741 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1743 return ms->base_gfn + ofs;
1746 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1747 u8 *res, unsigned long bufsize)
1749 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1750 struct kvm_memslots *slots = kvm_memslots(kvm);
1751 struct kvm_memory_slot *ms;
1753 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1754 ms = gfn_to_memslot(kvm, cur_gfn);
1756 args->start_gfn = cur_gfn;
1759 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1760 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1762 while (args->count < bufsize) {
1763 hva = gfn_to_hva(kvm, cur_gfn);
1764 if (kvm_is_error_hva(hva))
1766 /* Decrement only if we actually flipped the bit to 0 */
1767 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1768 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1769 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1771 /* Save the value */
1772 res[args->count++] = (pgstev >> 24) & 0x43;
1773 /* If the next bit is too far away, stop. */
1774 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1776 /* If we reached the previous "next", find the next one */
1777 if (cur_gfn == next_gfn)
1778 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1779 /* Reached the end of memory or of the buffer, stop */
1780 if ((next_gfn >= mem_end) ||
1781 (next_gfn - args->start_gfn >= bufsize))
1784 /* Reached the end of the current memslot, take the next one. */
1785 if (cur_gfn - ms->base_gfn >= ms->npages) {
1786 ms = gfn_to_memslot(kvm, cur_gfn);
1795 * This function searches for the next page with dirty CMMA attributes, and
1796 * saves the attributes in the buffer up to either the end of the buffer or
1797 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1798 * no trailing clean bytes are saved.
1799 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1800 * output buffer will indicate 0 as length.
1802 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1803 struct kvm_s390_cmma_log *args)
1805 unsigned long bufsize;
1806 int srcu_idx, peek, ret;
1809 if (!kvm->arch.use_cmma)
1811 /* Invalid/unsupported flags were specified */
1812 if (args->flags & ~KVM_S390_CMMA_PEEK)
1814 /* Migration mode query, and we are not doing a migration */
1815 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1816 if (!peek && !kvm->arch.migration_mode)
1818 /* CMMA is disabled or was not used, or the buffer has length zero */
1819 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1820 if (!bufsize || !kvm->mm->context.uses_cmm) {
1821 memset(args, 0, sizeof(*args));
1824 /* We are not peeking, and there are no dirty pages */
1825 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1826 memset(args, 0, sizeof(*args));
1830 values = vmalloc(bufsize);
1834 down_read(&kvm->mm->mmap_sem);
1835 srcu_idx = srcu_read_lock(&kvm->srcu);
1837 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1839 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1840 srcu_read_unlock(&kvm->srcu, srcu_idx);
1841 up_read(&kvm->mm->mmap_sem);
1843 if (kvm->arch.migration_mode)
1844 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1846 args->remaining = 0;
1848 if (copy_to_user((void __user *)args->values, values, args->count))
1856 * This function sets the CMMA attributes for the given pages. If the input
1857 * buffer has zero length, no action is taken, otherwise the attributes are
1858 * set and the mm->context.uses_cmm flag is set.
1860 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1861 const struct kvm_s390_cmma_log *args)
1863 unsigned long hva, mask, pgstev, i;
1865 int srcu_idx, r = 0;
1869 if (!kvm->arch.use_cmma)
1871 /* invalid/unsupported flags */
1872 if (args->flags != 0)
1874 /* Enforce sane limit on memory allocation */
1875 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1878 if (args->count == 0)
1881 bits = vmalloc(array_size(sizeof(*bits), args->count));
1885 r = copy_from_user(bits, (void __user *)args->values, args->count);
1891 down_read(&kvm->mm->mmap_sem);
1892 srcu_idx = srcu_read_lock(&kvm->srcu);
1893 for (i = 0; i < args->count; i++) {
1894 hva = gfn_to_hva(kvm, args->start_gfn + i);
1895 if (kvm_is_error_hva(hva)) {
1901 pgstev = pgstev << 24;
1902 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1903 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1905 srcu_read_unlock(&kvm->srcu, srcu_idx);
1906 up_read(&kvm->mm->mmap_sem);
1908 if (!kvm->mm->context.uses_cmm) {
1909 down_write(&kvm->mm->mmap_sem);
1910 kvm->mm->context.uses_cmm = 1;
1911 up_write(&kvm->mm->mmap_sem);
1918 long kvm_arch_vm_ioctl(struct file *filp,
1919 unsigned int ioctl, unsigned long arg)
1921 struct kvm *kvm = filp->private_data;
1922 void __user *argp = (void __user *)arg;
1923 struct kvm_device_attr attr;
1927 case KVM_S390_INTERRUPT: {
1928 struct kvm_s390_interrupt s390int;
1931 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1933 r = kvm_s390_inject_vm(kvm, &s390int);
1936 case KVM_ENABLE_CAP: {
1937 struct kvm_enable_cap cap;
1939 if (copy_from_user(&cap, argp, sizeof(cap)))
1941 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1944 case KVM_CREATE_IRQCHIP: {
1945 struct kvm_irq_routing_entry routing;
1948 if (kvm->arch.use_irqchip) {
1949 /* Set up dummy routing. */
1950 memset(&routing, 0, sizeof(routing));
1951 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1955 case KVM_SET_DEVICE_ATTR: {
1957 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1959 r = kvm_s390_vm_set_attr(kvm, &attr);
1962 case KVM_GET_DEVICE_ATTR: {
1964 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1966 r = kvm_s390_vm_get_attr(kvm, &attr);
1969 case KVM_HAS_DEVICE_ATTR: {
1971 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1973 r = kvm_s390_vm_has_attr(kvm, &attr);
1976 case KVM_S390_GET_SKEYS: {
1977 struct kvm_s390_skeys args;
1980 if (copy_from_user(&args, argp,
1981 sizeof(struct kvm_s390_skeys)))
1983 r = kvm_s390_get_skeys(kvm, &args);
1986 case KVM_S390_SET_SKEYS: {
1987 struct kvm_s390_skeys args;
1990 if (copy_from_user(&args, argp,
1991 sizeof(struct kvm_s390_skeys)))
1993 r = kvm_s390_set_skeys(kvm, &args);
1996 case KVM_S390_GET_CMMA_BITS: {
1997 struct kvm_s390_cmma_log args;
2000 if (copy_from_user(&args, argp, sizeof(args)))
2002 mutex_lock(&kvm->slots_lock);
2003 r = kvm_s390_get_cmma_bits(kvm, &args);
2004 mutex_unlock(&kvm->slots_lock);
2006 r = copy_to_user(argp, &args, sizeof(args));
2012 case KVM_S390_SET_CMMA_BITS: {
2013 struct kvm_s390_cmma_log args;
2016 if (copy_from_user(&args, argp, sizeof(args)))
2018 mutex_lock(&kvm->slots_lock);
2019 r = kvm_s390_set_cmma_bits(kvm, &args);
2020 mutex_unlock(&kvm->slots_lock);
2030 static int kvm_s390_apxa_installed(void)
2032 struct ap_config_info info;
2034 if (ap_instructions_available()) {
2035 if (ap_qci(&info) == 0)
2043 * The format of the crypto control block (CRYCB) is specified in the 3 low
2044 * order bits of the CRYCB designation (CRYCBD) field as follows:
2045 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2046 * AP extended addressing (APXA) facility are installed.
2047 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2048 * Format 2: Both the APXA and MSAX3 facilities are installed
2050 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2052 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2054 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2055 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2057 /* Check whether MSAX3 is installed */
2058 if (!test_kvm_facility(kvm, 76))
2061 if (kvm_s390_apxa_installed())
2062 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2064 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2067 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2068 unsigned long *aqm, unsigned long *adm)
2070 struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2072 mutex_lock(&kvm->lock);
2073 kvm_s390_vcpu_block_all(kvm);
2075 switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2076 case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2077 memcpy(crycb->apcb1.apm, apm, 32);
2078 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2079 apm[0], apm[1], apm[2], apm[3]);
2080 memcpy(crycb->apcb1.aqm, aqm, 32);
2081 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2082 aqm[0], aqm[1], aqm[2], aqm[3]);
2083 memcpy(crycb->apcb1.adm, adm, 32);
2084 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2085 adm[0], adm[1], adm[2], adm[3]);
2088 case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2089 memcpy(crycb->apcb0.apm, apm, 8);
2090 memcpy(crycb->apcb0.aqm, aqm, 2);
2091 memcpy(crycb->apcb0.adm, adm, 2);
2092 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2093 apm[0], *((unsigned short *)aqm),
2094 *((unsigned short *)adm));
2096 default: /* Can not happen */
2100 /* recreate the shadow crycb for each vcpu */
2101 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2102 kvm_s390_vcpu_unblock_all(kvm);
2103 mutex_unlock(&kvm->lock);
2105 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2107 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2109 mutex_lock(&kvm->lock);
2110 kvm_s390_vcpu_block_all(kvm);
2112 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2113 sizeof(kvm->arch.crypto.crycb->apcb0));
2114 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2115 sizeof(kvm->arch.crypto.crycb->apcb1));
2117 VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2118 /* recreate the shadow crycb for each vcpu */
2119 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2120 kvm_s390_vcpu_unblock_all(kvm);
2121 mutex_unlock(&kvm->lock);
2123 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2125 static u64 kvm_s390_get_initial_cpuid(void)
2130 cpuid.version = 0xff;
2131 return *((u64 *) &cpuid);
2134 static void kvm_s390_crypto_init(struct kvm *kvm)
2136 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2137 kvm_s390_set_crycb_format(kvm);
2139 if (!test_kvm_facility(kvm, 76))
2142 /* Enable AES/DEA protected key functions by default */
2143 kvm->arch.crypto.aes_kw = 1;
2144 kvm->arch.crypto.dea_kw = 1;
2145 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2146 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2147 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2148 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2151 static void sca_dispose(struct kvm *kvm)
2153 if (kvm->arch.use_esca)
2154 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2156 free_page((unsigned long)(kvm->arch.sca));
2157 kvm->arch.sca = NULL;
2160 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2162 gfp_t alloc_flags = GFP_KERNEL;
2164 char debug_name[16];
2165 static unsigned long sca_offset;
2168 #ifdef CONFIG_KVM_S390_UCONTROL
2169 if (type & ~KVM_VM_S390_UCONTROL)
2171 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2178 rc = s390_enable_sie();
2184 if (!sclp.has_64bscao)
2185 alloc_flags |= GFP_DMA;
2186 rwlock_init(&kvm->arch.sca_lock);
2187 /* start with basic SCA */
2188 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2191 spin_lock(&kvm_lock);
2193 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2195 kvm->arch.sca = (struct bsca_block *)
2196 ((char *) kvm->arch.sca + sca_offset);
2197 spin_unlock(&kvm_lock);
2199 sprintf(debug_name, "kvm-%u", current->pid);
2201 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2205 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2206 kvm->arch.sie_page2 =
2207 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2208 if (!kvm->arch.sie_page2)
2211 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2213 for (i = 0; i < kvm_s390_fac_size(); i++) {
2214 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2215 (kvm_s390_fac_base[i] |
2216 kvm_s390_fac_ext[i]);
2217 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2218 kvm_s390_fac_base[i];
2221 /* we are always in czam mode - even on pre z14 machines */
2222 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2223 set_kvm_facility(kvm->arch.model.fac_list, 138);
2224 /* we emulate STHYI in kvm */
2225 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2226 set_kvm_facility(kvm->arch.model.fac_list, 74);
2227 if (MACHINE_HAS_TLB_GUEST) {
2228 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2229 set_kvm_facility(kvm->arch.model.fac_list, 147);
2232 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2233 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2235 kvm_s390_crypto_init(kvm);
2237 mutex_init(&kvm->arch.float_int.ais_lock);
2238 spin_lock_init(&kvm->arch.float_int.lock);
2239 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2240 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2241 init_waitqueue_head(&kvm->arch.ipte_wq);
2242 mutex_init(&kvm->arch.ipte_mutex);
2244 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2245 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2247 if (type & KVM_VM_S390_UCONTROL) {
2248 kvm->arch.gmap = NULL;
2249 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2251 if (sclp.hamax == U64_MAX)
2252 kvm->arch.mem_limit = TASK_SIZE_MAX;
2254 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2256 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2257 if (!kvm->arch.gmap)
2259 kvm->arch.gmap->private = kvm;
2260 kvm->arch.gmap->pfault_enabled = 0;
2263 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2264 kvm->arch.use_skf = sclp.has_skey;
2265 spin_lock_init(&kvm->arch.start_stop_lock);
2266 kvm_s390_vsie_init(kvm);
2267 kvm_s390_gisa_init(kvm);
2268 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2272 free_page((unsigned long)kvm->arch.sie_page2);
2273 debug_unregister(kvm->arch.dbf);
2275 KVM_EVENT(3, "creation of vm failed: %d", rc);
2279 bool kvm_arch_has_vcpu_debugfs(void)
2284 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2289 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2291 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2292 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2293 kvm_s390_clear_local_irqs(vcpu);
2294 kvm_clear_async_pf_completion_queue(vcpu);
2295 if (!kvm_is_ucontrol(vcpu->kvm))
2298 if (kvm_is_ucontrol(vcpu->kvm))
2299 gmap_remove(vcpu->arch.gmap);
2301 if (vcpu->kvm->arch.use_cmma)
2302 kvm_s390_vcpu_unsetup_cmma(vcpu);
2303 free_page((unsigned long)(vcpu->arch.sie_block));
2305 kvm_vcpu_uninit(vcpu);
2306 kmem_cache_free(kvm_vcpu_cache, vcpu);
2309 static void kvm_free_vcpus(struct kvm *kvm)
2312 struct kvm_vcpu *vcpu;
2314 kvm_for_each_vcpu(i, vcpu, kvm)
2315 kvm_arch_vcpu_destroy(vcpu);
2317 mutex_lock(&kvm->lock);
2318 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2319 kvm->vcpus[i] = NULL;
2321 atomic_set(&kvm->online_vcpus, 0);
2322 mutex_unlock(&kvm->lock);
2325 void kvm_arch_destroy_vm(struct kvm *kvm)
2327 kvm_free_vcpus(kvm);
2329 debug_unregister(kvm->arch.dbf);
2330 kvm_s390_gisa_destroy(kvm);
2331 free_page((unsigned long)kvm->arch.sie_page2);
2332 if (!kvm_is_ucontrol(kvm))
2333 gmap_remove(kvm->arch.gmap);
2334 kvm_s390_destroy_adapters(kvm);
2335 kvm_s390_clear_float_irqs(kvm);
2336 kvm_s390_vsie_destroy(kvm);
2337 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2340 /* Section: vcpu related */
2341 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2343 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2344 if (!vcpu->arch.gmap)
2346 vcpu->arch.gmap->private = vcpu->kvm;
2351 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2353 if (!kvm_s390_use_sca_entries())
2355 read_lock(&vcpu->kvm->arch.sca_lock);
2356 if (vcpu->kvm->arch.use_esca) {
2357 struct esca_block *sca = vcpu->kvm->arch.sca;
2359 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2360 sca->cpu[vcpu->vcpu_id].sda = 0;
2362 struct bsca_block *sca = vcpu->kvm->arch.sca;
2364 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2365 sca->cpu[vcpu->vcpu_id].sda = 0;
2367 read_unlock(&vcpu->kvm->arch.sca_lock);
2370 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2372 if (!kvm_s390_use_sca_entries()) {
2373 struct bsca_block *sca = vcpu->kvm->arch.sca;
2375 /* we still need the basic sca for the ipte control */
2376 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2377 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2380 read_lock(&vcpu->kvm->arch.sca_lock);
2381 if (vcpu->kvm->arch.use_esca) {
2382 struct esca_block *sca = vcpu->kvm->arch.sca;
2384 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2385 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2386 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2387 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2388 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2390 struct bsca_block *sca = vcpu->kvm->arch.sca;
2392 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2393 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2394 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2395 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2397 read_unlock(&vcpu->kvm->arch.sca_lock);
2400 /* Basic SCA to Extended SCA data copy routines */
2401 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2404 d->sigp_ctrl.c = s->sigp_ctrl.c;
2405 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2408 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2412 d->ipte_control = s->ipte_control;
2414 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2415 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2418 static int sca_switch_to_extended(struct kvm *kvm)
2420 struct bsca_block *old_sca = kvm->arch.sca;
2421 struct esca_block *new_sca;
2422 struct kvm_vcpu *vcpu;
2423 unsigned int vcpu_idx;
2426 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2430 scaoh = (u32)((u64)(new_sca) >> 32);
2431 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2433 kvm_s390_vcpu_block_all(kvm);
2434 write_lock(&kvm->arch.sca_lock);
2436 sca_copy_b_to_e(new_sca, old_sca);
2438 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2439 vcpu->arch.sie_block->scaoh = scaoh;
2440 vcpu->arch.sie_block->scaol = scaol;
2441 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2443 kvm->arch.sca = new_sca;
2444 kvm->arch.use_esca = 1;
2446 write_unlock(&kvm->arch.sca_lock);
2447 kvm_s390_vcpu_unblock_all(kvm);
2449 free_page((unsigned long)old_sca);
2451 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2452 old_sca, kvm->arch.sca);
2456 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2460 if (!kvm_s390_use_sca_entries()) {
2461 if (id < KVM_MAX_VCPUS)
2465 if (id < KVM_S390_BSCA_CPU_SLOTS)
2467 if (!sclp.has_esca || !sclp.has_64bscao)
2470 mutex_lock(&kvm->lock);
2471 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2472 mutex_unlock(&kvm->lock);
2474 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2477 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2479 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2480 kvm_clear_async_pf_completion_queue(vcpu);
2481 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2487 kvm_s390_set_prefix(vcpu, 0);
2488 if (test_kvm_facility(vcpu->kvm, 64))
2489 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2490 if (test_kvm_facility(vcpu->kvm, 82))
2491 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2492 if (test_kvm_facility(vcpu->kvm, 133))
2493 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2494 if (test_kvm_facility(vcpu->kvm, 156))
2495 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2496 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2497 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2500 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2502 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2504 if (kvm_is_ucontrol(vcpu->kvm))
2505 return __kvm_ucontrol_vcpu_init(vcpu);
2510 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2511 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2513 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2514 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2515 vcpu->arch.cputm_start = get_tod_clock_fast();
2516 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2519 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2520 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2522 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2523 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2524 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2525 vcpu->arch.cputm_start = 0;
2526 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2529 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2530 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2532 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2533 vcpu->arch.cputm_enabled = true;
2534 __start_cpu_timer_accounting(vcpu);
2537 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2538 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2540 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2541 __stop_cpu_timer_accounting(vcpu);
2542 vcpu->arch.cputm_enabled = false;
2545 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2547 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2548 __enable_cpu_timer_accounting(vcpu);
2552 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2554 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2555 __disable_cpu_timer_accounting(vcpu);
2559 /* set the cpu timer - may only be called from the VCPU thread itself */
2560 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2562 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2563 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2564 if (vcpu->arch.cputm_enabled)
2565 vcpu->arch.cputm_start = get_tod_clock_fast();
2566 vcpu->arch.sie_block->cputm = cputm;
2567 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2571 /* update and get the cpu timer - can also be called from other VCPU threads */
2572 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2577 if (unlikely(!vcpu->arch.cputm_enabled))
2578 return vcpu->arch.sie_block->cputm;
2580 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2582 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2584 * If the writer would ever execute a read in the critical
2585 * section, e.g. in irq context, we have a deadlock.
2587 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2588 value = vcpu->arch.sie_block->cputm;
2589 /* if cputm_start is 0, accounting is being started/stopped */
2590 if (likely(vcpu->arch.cputm_start))
2591 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2592 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2597 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2600 gmap_enable(vcpu->arch.enabled_gmap);
2601 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2602 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2603 __start_cpu_timer_accounting(vcpu);
2607 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2610 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2611 __stop_cpu_timer_accounting(vcpu);
2612 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2613 vcpu->arch.enabled_gmap = gmap_get_enabled();
2614 gmap_disable(vcpu->arch.enabled_gmap);
2618 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2620 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2621 vcpu->arch.sie_block->gpsw.mask = 0UL;
2622 vcpu->arch.sie_block->gpsw.addr = 0UL;
2623 kvm_s390_set_prefix(vcpu, 0);
2624 kvm_s390_set_cpu_timer(vcpu, 0);
2625 vcpu->arch.sie_block->ckc = 0UL;
2626 vcpu->arch.sie_block->todpr = 0;
2627 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2628 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2629 CR0_INTERRUPT_KEY_SUBMASK |
2630 CR0_MEASUREMENT_ALERT_SUBMASK;
2631 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2633 CR14_EXTERNAL_DAMAGE_SUBMASK;
2634 /* make sure the new fpc will be lazily loaded */
2636 current->thread.fpu.fpc = 0;
2637 vcpu->arch.sie_block->gbea = 1;
2638 vcpu->arch.sie_block->pp = 0;
2639 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2640 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2641 kvm_clear_async_pf_completion_queue(vcpu);
2642 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2643 kvm_s390_vcpu_stop(vcpu);
2644 kvm_s390_clear_local_irqs(vcpu);
2647 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2649 mutex_lock(&vcpu->kvm->lock);
2651 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2652 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2654 mutex_unlock(&vcpu->kvm->lock);
2655 if (!kvm_is_ucontrol(vcpu->kvm)) {
2656 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2659 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2660 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2661 /* make vcpu_load load the right gmap on the first trigger */
2662 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2665 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2668 * If the AP instructions are not being interpreted and the MSAX3
2669 * facility is not configured for the guest, there is nothing to set up.
2671 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2674 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2675 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2676 vcpu->arch.sie_block->eca &= ~ECA_APIE;
2678 if (vcpu->kvm->arch.crypto.apie)
2679 vcpu->arch.sie_block->eca |= ECA_APIE;
2681 /* Set up protected key support */
2682 if (vcpu->kvm->arch.crypto.aes_kw)
2683 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2684 if (vcpu->kvm->arch.crypto.dea_kw)
2685 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2688 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2690 free_page(vcpu->arch.sie_block->cbrlo);
2691 vcpu->arch.sie_block->cbrlo = 0;
2694 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2696 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2697 if (!vcpu->arch.sie_block->cbrlo)
2702 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2704 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2706 vcpu->arch.sie_block->ibc = model->ibc;
2707 if (test_kvm_facility(vcpu->kvm, 7))
2708 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2711 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2715 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2719 if (test_kvm_facility(vcpu->kvm, 78))
2720 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2721 else if (test_kvm_facility(vcpu->kvm, 8))
2722 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2724 kvm_s390_vcpu_setup_model(vcpu);
2726 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2727 if (MACHINE_HAS_ESOP)
2728 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2729 if (test_kvm_facility(vcpu->kvm, 9))
2730 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2731 if (test_kvm_facility(vcpu->kvm, 73))
2732 vcpu->arch.sie_block->ecb |= ECB_TE;
2734 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2735 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2736 if (test_kvm_facility(vcpu->kvm, 130))
2737 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2738 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2740 vcpu->arch.sie_block->eca |= ECA_CEI;
2742 vcpu->arch.sie_block->eca |= ECA_IB;
2744 vcpu->arch.sie_block->eca |= ECA_SII;
2745 if (sclp.has_sigpif)
2746 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2747 if (test_kvm_facility(vcpu->kvm, 129)) {
2748 vcpu->arch.sie_block->eca |= ECA_VX;
2749 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2751 if (test_kvm_facility(vcpu->kvm, 139))
2752 vcpu->arch.sie_block->ecd |= ECD_MEF;
2753 if (test_kvm_facility(vcpu->kvm, 156))
2754 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2755 if (vcpu->arch.sie_block->gd) {
2756 vcpu->arch.sie_block->eca |= ECA_AIV;
2757 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2758 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2760 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2762 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2765 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2767 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2769 if (vcpu->kvm->arch.use_cmma) {
2770 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2774 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2775 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2777 vcpu->arch.sie_block->hpid = HPID_KVM;
2779 kvm_s390_vcpu_crypto_setup(vcpu);
2784 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2787 struct kvm_vcpu *vcpu;
2788 struct sie_page *sie_page;
2791 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2796 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2800 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2801 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2805 vcpu->arch.sie_block = &sie_page->sie_block;
2806 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2808 /* the real guest size will always be smaller than msl */
2809 vcpu->arch.sie_block->mso = 0;
2810 vcpu->arch.sie_block->msl = sclp.hamax;
2812 vcpu->arch.sie_block->icpua = id;
2813 spin_lock_init(&vcpu->arch.local_int.lock);
2814 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2815 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2816 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2817 seqcount_init(&vcpu->arch.cputm_seqcount);
2819 rc = kvm_vcpu_init(vcpu, kvm, id);
2821 goto out_free_sie_block;
2822 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2823 vcpu->arch.sie_block);
2824 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2828 free_page((unsigned long)(vcpu->arch.sie_block));
2830 kmem_cache_free(kvm_vcpu_cache, vcpu);
2835 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2837 return kvm_s390_vcpu_has_irq(vcpu, 0);
2840 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2842 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2845 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2847 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2851 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2853 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2856 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2858 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2862 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2864 return atomic_read(&vcpu->arch.sie_block->prog20) &
2865 (PROG_BLOCK_SIE | PROG_REQUEST);
2868 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2870 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2874 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2875 * If the CPU is not running (e.g. waiting as idle) the function will
2876 * return immediately. */
2877 void exit_sie(struct kvm_vcpu *vcpu)
2879 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2880 kvm_s390_vsie_kick(vcpu);
2881 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2885 /* Kick a guest cpu out of SIE to process a request synchronously */
2886 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2888 kvm_make_request(req, vcpu);
2889 kvm_s390_vcpu_request(vcpu);
2892 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2895 struct kvm *kvm = gmap->private;
2896 struct kvm_vcpu *vcpu;
2897 unsigned long prefix;
2900 if (gmap_is_shadow(gmap))
2902 if (start >= 1UL << 31)
2903 /* We are only interested in prefix pages */
2905 kvm_for_each_vcpu(i, vcpu, kvm) {
2906 /* match against both prefix pages */
2907 prefix = kvm_s390_get_prefix(vcpu);
2908 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2909 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2911 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2916 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2918 /* kvm common code refers to this, but never calls it */
2923 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2924 struct kvm_one_reg *reg)
2929 case KVM_REG_S390_TODPR:
2930 r = put_user(vcpu->arch.sie_block->todpr,
2931 (u32 __user *)reg->addr);
2933 case KVM_REG_S390_EPOCHDIFF:
2934 r = put_user(vcpu->arch.sie_block->epoch,
2935 (u64 __user *)reg->addr);
2937 case KVM_REG_S390_CPU_TIMER:
2938 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2939 (u64 __user *)reg->addr);
2941 case KVM_REG_S390_CLOCK_COMP:
2942 r = put_user(vcpu->arch.sie_block->ckc,
2943 (u64 __user *)reg->addr);
2945 case KVM_REG_S390_PFTOKEN:
2946 r = put_user(vcpu->arch.pfault_token,
2947 (u64 __user *)reg->addr);
2949 case KVM_REG_S390_PFCOMPARE:
2950 r = put_user(vcpu->arch.pfault_compare,
2951 (u64 __user *)reg->addr);
2953 case KVM_REG_S390_PFSELECT:
2954 r = put_user(vcpu->arch.pfault_select,
2955 (u64 __user *)reg->addr);
2957 case KVM_REG_S390_PP:
2958 r = put_user(vcpu->arch.sie_block->pp,
2959 (u64 __user *)reg->addr);
2961 case KVM_REG_S390_GBEA:
2962 r = put_user(vcpu->arch.sie_block->gbea,
2963 (u64 __user *)reg->addr);
2972 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2973 struct kvm_one_reg *reg)
2979 case KVM_REG_S390_TODPR:
2980 r = get_user(vcpu->arch.sie_block->todpr,
2981 (u32 __user *)reg->addr);
2983 case KVM_REG_S390_EPOCHDIFF:
2984 r = get_user(vcpu->arch.sie_block->epoch,
2985 (u64 __user *)reg->addr);
2987 case KVM_REG_S390_CPU_TIMER:
2988 r = get_user(val, (u64 __user *)reg->addr);
2990 kvm_s390_set_cpu_timer(vcpu, val);
2992 case KVM_REG_S390_CLOCK_COMP:
2993 r = get_user(vcpu->arch.sie_block->ckc,
2994 (u64 __user *)reg->addr);
2996 case KVM_REG_S390_PFTOKEN:
2997 r = get_user(vcpu->arch.pfault_token,
2998 (u64 __user *)reg->addr);
2999 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3000 kvm_clear_async_pf_completion_queue(vcpu);
3002 case KVM_REG_S390_PFCOMPARE:
3003 r = get_user(vcpu->arch.pfault_compare,
3004 (u64 __user *)reg->addr);
3006 case KVM_REG_S390_PFSELECT:
3007 r = get_user(vcpu->arch.pfault_select,
3008 (u64 __user *)reg->addr);
3010 case KVM_REG_S390_PP:
3011 r = get_user(vcpu->arch.sie_block->pp,
3012 (u64 __user *)reg->addr);
3014 case KVM_REG_S390_GBEA:
3015 r = get_user(vcpu->arch.sie_block->gbea,
3016 (u64 __user *)reg->addr);
3025 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3027 kvm_s390_vcpu_initial_reset(vcpu);
3031 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3034 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
3039 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3042 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3047 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3048 struct kvm_sregs *sregs)
3052 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3053 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3059 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3060 struct kvm_sregs *sregs)
3064 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3065 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3071 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3077 if (test_fp_ctl(fpu->fpc)) {
3081 vcpu->run->s.regs.fpc = fpu->fpc;
3083 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3084 (freg_t *) fpu->fprs);
3086 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3093 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3097 /* make sure we have the latest values */
3100 convert_vx_to_fp((freg_t *) fpu->fprs,
3101 (__vector128 *) vcpu->run->s.regs.vrs);
3103 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3104 fpu->fpc = vcpu->run->s.regs.fpc;
3110 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3114 if (!is_vcpu_stopped(vcpu))
3117 vcpu->run->psw_mask = psw.mask;
3118 vcpu->run->psw_addr = psw.addr;
3123 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3124 struct kvm_translation *tr)
3126 return -EINVAL; /* not implemented yet */
3129 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3130 KVM_GUESTDBG_USE_HW_BP | \
3131 KVM_GUESTDBG_ENABLE)
3133 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3134 struct kvm_guest_debug *dbg)
3140 vcpu->guest_debug = 0;
3141 kvm_s390_clear_bp_data(vcpu);
3143 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3147 if (!sclp.has_gpere) {
3152 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3153 vcpu->guest_debug = dbg->control;
3154 /* enforce guest PER */
3155 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3157 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3158 rc = kvm_s390_import_bp_data(vcpu, dbg);
3160 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3161 vcpu->arch.guestdbg.last_bp = 0;
3165 vcpu->guest_debug = 0;
3166 kvm_s390_clear_bp_data(vcpu);
3167 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3175 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3176 struct kvm_mp_state *mp_state)
3182 /* CHECK_STOP and LOAD are not supported yet */
3183 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3184 KVM_MP_STATE_OPERATING;
3190 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3191 struct kvm_mp_state *mp_state)
3197 /* user space knows about this interface - let it control the state */
3198 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3200 switch (mp_state->mp_state) {
3201 case KVM_MP_STATE_STOPPED:
3202 kvm_s390_vcpu_stop(vcpu);
3204 case KVM_MP_STATE_OPERATING:
3205 kvm_s390_vcpu_start(vcpu);
3207 case KVM_MP_STATE_LOAD:
3208 case KVM_MP_STATE_CHECK_STOP:
3209 /* fall through - CHECK_STOP and LOAD are not supported yet */
3218 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3220 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3223 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3226 kvm_s390_vcpu_request_handled(vcpu);
3227 if (!kvm_request_pending(vcpu))
3230 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3231 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3232 * This ensures that the ipte instruction for this request has
3233 * already finished. We might race against a second unmapper that
3234 * wants to set the blocking bit. Lets just retry the request loop.
3236 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3238 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3239 kvm_s390_get_prefix(vcpu),
3240 PAGE_SIZE * 2, PROT_WRITE);
3242 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3248 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3249 vcpu->arch.sie_block->ihcpu = 0xffff;
3253 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3254 if (!ibs_enabled(vcpu)) {
3255 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3256 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3261 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3262 if (ibs_enabled(vcpu)) {
3263 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3264 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3269 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3270 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3274 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3276 * Disable CMM virtualization; we will emulate the ESSA
3277 * instruction manually, in order to provide additional
3278 * functionalities needed for live migration.
3280 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3284 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3286 * Re-enable CMM virtualization if CMMA is available and
3287 * CMM has been used.
3289 if ((vcpu->kvm->arch.use_cmma) &&
3290 (vcpu->kvm->mm->context.uses_cmm))
3291 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3295 /* nothing to do, just clear the request */
3296 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3297 /* we left the vsie handler, nothing to do, just clear the request */
3298 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3303 void kvm_s390_set_tod_clock(struct kvm *kvm,
3304 const struct kvm_s390_vm_tod_clock *gtod)
3306 struct kvm_vcpu *vcpu;
3307 struct kvm_s390_tod_clock_ext htod;
3310 mutex_lock(&kvm->lock);
3313 get_tod_clock_ext((char *)&htod);
3315 kvm->arch.epoch = gtod->tod - htod.tod;
3317 if (test_kvm_facility(kvm, 139)) {
3318 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3319 if (kvm->arch.epoch > gtod->tod)
3320 kvm->arch.epdx -= 1;
3323 kvm_s390_vcpu_block_all(kvm);
3324 kvm_for_each_vcpu(i, vcpu, kvm) {
3325 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3326 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3329 kvm_s390_vcpu_unblock_all(kvm);
3331 mutex_unlock(&kvm->lock);
3335 * kvm_arch_fault_in_page - fault-in guest page if necessary
3336 * @vcpu: The corresponding virtual cpu
3337 * @gpa: Guest physical address
3338 * @writable: Whether the page should be writable or not
3340 * Make sure that a guest page has been faulted-in on the host.
3342 * Return: Zero on success, negative error code otherwise.
3344 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3346 return gmap_fault(vcpu->arch.gmap, gpa,
3347 writable ? FAULT_FLAG_WRITE : 0);
3350 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3351 unsigned long token)
3353 struct kvm_s390_interrupt inti;
3354 struct kvm_s390_irq irq;
3357 irq.u.ext.ext_params2 = token;
3358 irq.type = KVM_S390_INT_PFAULT_INIT;
3359 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3361 inti.type = KVM_S390_INT_PFAULT_DONE;
3362 inti.parm64 = token;
3363 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3367 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3368 struct kvm_async_pf *work)
3370 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3371 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3374 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3375 struct kvm_async_pf *work)
3377 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3378 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3381 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3382 struct kvm_async_pf *work)
3384 /* s390 will always inject the page directly */
3387 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3390 * s390 will always inject the page directly,
3391 * but we still want check_async_completion to cleanup
3396 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3399 struct kvm_arch_async_pf arch;
3402 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3404 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3405 vcpu->arch.pfault_compare)
3407 if (psw_extint_disabled(vcpu))
3409 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3411 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3413 if (!vcpu->arch.gmap->pfault_enabled)
3416 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3417 hva += current->thread.gmap_addr & ~PAGE_MASK;
3418 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3421 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3425 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3430 * On s390 notifications for arriving pages will be delivered directly
3431 * to the guest but the house keeping for completed pfaults is
3432 * handled outside the worker.
3434 kvm_check_async_pf_completion(vcpu);
3436 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3437 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3442 if (test_cpu_flag(CIF_MCCK_PENDING))
3445 if (!kvm_is_ucontrol(vcpu->kvm)) {
3446 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3451 rc = kvm_s390_handle_requests(vcpu);
3455 if (guestdbg_enabled(vcpu)) {
3456 kvm_s390_backup_guest_per_regs(vcpu);
3457 kvm_s390_patch_guest_per_regs(vcpu);
3460 vcpu->arch.sie_block->icptcode = 0;
3461 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3462 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3463 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3468 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3470 struct kvm_s390_pgm_info pgm_info = {
3471 .code = PGM_ADDRESSING,
3476 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3477 trace_kvm_s390_sie_fault(vcpu);
3480 * We want to inject an addressing exception, which is defined as a
3481 * suppressing or terminating exception. However, since we came here
3482 * by a DAT access exception, the PSW still points to the faulting
3483 * instruction since DAT exceptions are nullifying. So we've got
3484 * to look up the current opcode to get the length of the instruction
3485 * to be able to forward the PSW.
3487 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3488 ilen = insn_length(opcode);
3492 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3493 * Forward by arbitrary ilc, injection will take care of
3494 * nullification if necessary.
3496 pgm_info = vcpu->arch.pgm;
3499 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3500 kvm_s390_forward_psw(vcpu, ilen);
3501 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3504 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3506 struct mcck_volatile_info *mcck_info;
3507 struct sie_page *sie_page;
3509 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3510 vcpu->arch.sie_block->icptcode);
3511 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3513 if (guestdbg_enabled(vcpu))
3514 kvm_s390_restore_guest_per_regs(vcpu);
3516 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3517 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3519 if (exit_reason == -EINTR) {
3520 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3521 sie_page = container_of(vcpu->arch.sie_block,
3522 struct sie_page, sie_block);
3523 mcck_info = &sie_page->mcck_info;
3524 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3528 if (vcpu->arch.sie_block->icptcode > 0) {
3529 int rc = kvm_handle_sie_intercept(vcpu);
3531 if (rc != -EOPNOTSUPP)
3533 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3534 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3535 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3536 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3538 } else if (exit_reason != -EFAULT) {
3539 vcpu->stat.exit_null++;
3541 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3542 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3543 vcpu->run->s390_ucontrol.trans_exc_code =
3544 current->thread.gmap_addr;
3545 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3547 } else if (current->thread.gmap_pfault) {
3548 trace_kvm_s390_major_guest_pfault(vcpu);
3549 current->thread.gmap_pfault = 0;
3550 if (kvm_arch_setup_async_pf(vcpu))
3552 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3554 return vcpu_post_run_fault_in_sie(vcpu);
3557 static int __vcpu_run(struct kvm_vcpu *vcpu)
3559 int rc, exit_reason;
3562 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3563 * ning the guest), so that memslots (and other stuff) are protected
3565 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3568 rc = vcpu_pre_run(vcpu);
3572 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3574 * As PF_VCPU will be used in fault handler, between
3575 * guest_enter and guest_exit should be no uaccess.
3577 local_irq_disable();
3578 guest_enter_irqoff();
3579 __disable_cpu_timer_accounting(vcpu);
3581 exit_reason = sie64a(vcpu->arch.sie_block,
3582 vcpu->run->s.regs.gprs);
3583 local_irq_disable();
3584 __enable_cpu_timer_accounting(vcpu);
3585 guest_exit_irqoff();
3587 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3589 rc = vcpu_post_run(vcpu, exit_reason);
3590 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3592 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3596 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3598 struct runtime_instr_cb *riccb;
3601 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3602 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3603 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3604 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3605 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3606 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3607 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3608 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3609 /* some control register changes require a tlb flush */
3610 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3612 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3613 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3614 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3615 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3616 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3617 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3619 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3620 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3621 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3622 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3623 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3624 kvm_clear_async_pf_completion_queue(vcpu);
3627 * If userspace sets the riccb (e.g. after migration) to a valid state,
3628 * we should enable RI here instead of doing the lazy enablement.
3630 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3631 test_kvm_facility(vcpu->kvm, 64) &&
3633 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3634 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3635 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3638 * If userspace sets the gscb (e.g. after migration) to non-zero,
3639 * we should enable GS here instead of doing the lazy enablement.
3641 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3642 test_kvm_facility(vcpu->kvm, 133) &&
3644 !vcpu->arch.gs_enabled) {
3645 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3646 vcpu->arch.sie_block->ecb |= ECB_GS;
3647 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3648 vcpu->arch.gs_enabled = 1;
3650 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3651 test_kvm_facility(vcpu->kvm, 82)) {
3652 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3653 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3655 save_access_regs(vcpu->arch.host_acrs);
3656 restore_access_regs(vcpu->run->s.regs.acrs);
3657 /* save host (userspace) fprs/vrs */
3659 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3660 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3662 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3664 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3665 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3666 if (test_fp_ctl(current->thread.fpu.fpc))
3667 /* User space provided an invalid FPC, let's clear it */
3668 current->thread.fpu.fpc = 0;
3669 if (MACHINE_HAS_GS) {
3671 __ctl_set_bit(2, 4);
3672 if (current->thread.gs_cb) {
3673 vcpu->arch.host_gscb = current->thread.gs_cb;
3674 save_gs_cb(vcpu->arch.host_gscb);
3676 if (vcpu->arch.gs_enabled) {
3677 current->thread.gs_cb = (struct gs_cb *)
3678 &vcpu->run->s.regs.gscb;
3679 restore_gs_cb(current->thread.gs_cb);
3683 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3685 kvm_run->kvm_dirty_regs = 0;
3688 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3690 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3691 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3692 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3693 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3694 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3695 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3696 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3697 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3698 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3699 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3700 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3701 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3702 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3703 save_access_regs(vcpu->run->s.regs.acrs);
3704 restore_access_regs(vcpu->arch.host_acrs);
3705 /* Save guest register state */
3707 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3708 /* Restore will be done lazily at return */
3709 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3710 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3711 if (MACHINE_HAS_GS) {
3712 __ctl_set_bit(2, 4);
3713 if (vcpu->arch.gs_enabled)
3714 save_gs_cb(current->thread.gs_cb);
3716 current->thread.gs_cb = vcpu->arch.host_gscb;
3717 restore_gs_cb(vcpu->arch.host_gscb);
3719 if (!vcpu->arch.host_gscb)
3720 __ctl_clear_bit(2, 4);
3721 vcpu->arch.host_gscb = NULL;
3723 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3726 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3730 if (kvm_run->immediate_exit)
3735 if (guestdbg_exit_pending(vcpu)) {
3736 kvm_s390_prepare_debug_exit(vcpu);
3741 kvm_sigset_activate(vcpu);
3743 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3744 kvm_s390_vcpu_start(vcpu);
3745 } else if (is_vcpu_stopped(vcpu)) {
3746 pr_err_ratelimited("can't run stopped vcpu %d\n",
3752 sync_regs(vcpu, kvm_run);
3753 enable_cpu_timer_accounting(vcpu);
3756 rc = __vcpu_run(vcpu);
3758 if (signal_pending(current) && !rc) {
3759 kvm_run->exit_reason = KVM_EXIT_INTR;
3763 if (guestdbg_exit_pending(vcpu) && !rc) {
3764 kvm_s390_prepare_debug_exit(vcpu);
3768 if (rc == -EREMOTE) {
3769 /* userspace support is needed, kvm_run has been prepared */
3773 disable_cpu_timer_accounting(vcpu);
3774 store_regs(vcpu, kvm_run);
3776 kvm_sigset_deactivate(vcpu);
3778 vcpu->stat.exit_userspace++;
3785 * store status at address
3786 * we use have two special cases:
3787 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3788 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3790 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3792 unsigned char archmode = 1;
3793 freg_t fprs[NUM_FPRS];
3798 px = kvm_s390_get_prefix(vcpu);
3799 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3800 if (write_guest_abs(vcpu, 163, &archmode, 1))
3803 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3804 if (write_guest_real(vcpu, 163, &archmode, 1))
3808 gpa -= __LC_FPREGS_SAVE_AREA;
3810 /* manually convert vector registers if necessary */
3811 if (MACHINE_HAS_VX) {
3812 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3813 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3816 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3817 vcpu->run->s.regs.fprs, 128);
3819 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3820 vcpu->run->s.regs.gprs, 128);
3821 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3822 &vcpu->arch.sie_block->gpsw, 16);
3823 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3825 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3826 &vcpu->run->s.regs.fpc, 4);
3827 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3828 &vcpu->arch.sie_block->todpr, 4);
3829 cputm = kvm_s390_get_cpu_timer(vcpu);
3830 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3832 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3833 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3835 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3836 &vcpu->run->s.regs.acrs, 64);
3837 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3838 &vcpu->arch.sie_block->gcr, 128);
3839 return rc ? -EFAULT : 0;
3842 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3845 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3846 * switch in the run ioctl. Let's update our copies before we save
3847 * it into the save area
3850 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3851 save_access_regs(vcpu->run->s.regs.acrs);
3853 return kvm_s390_store_status_unloaded(vcpu, addr);
3856 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3858 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3859 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3862 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3865 struct kvm_vcpu *vcpu;
3867 kvm_for_each_vcpu(i, vcpu, kvm) {
3868 __disable_ibs_on_vcpu(vcpu);
3872 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3876 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3877 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3880 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3882 int i, online_vcpus, started_vcpus = 0;
3884 if (!is_vcpu_stopped(vcpu))
3887 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3888 /* Only one cpu at a time may enter/leave the STOPPED state. */
3889 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3890 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3892 for (i = 0; i < online_vcpus; i++) {
3893 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3897 if (started_vcpus == 0) {
3898 /* we're the only active VCPU -> speed it up */
3899 __enable_ibs_on_vcpu(vcpu);
3900 } else if (started_vcpus == 1) {
3902 * As we are starting a second VCPU, we have to disable
3903 * the IBS facility on all VCPUs to remove potentially
3904 * oustanding ENABLE requests.
3906 __disable_ibs_on_all_vcpus(vcpu->kvm);
3909 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3911 * Another VCPU might have used IBS while we were offline.
3912 * Let's play safe and flush the VCPU at startup.
3914 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3915 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3919 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3921 int i, online_vcpus, started_vcpus = 0;
3922 struct kvm_vcpu *started_vcpu = NULL;
3924 if (is_vcpu_stopped(vcpu))
3927 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3928 /* Only one cpu at a time may enter/leave the STOPPED state. */
3929 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3930 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3932 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3933 kvm_s390_clear_stop_irq(vcpu);
3935 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3936 __disable_ibs_on_vcpu(vcpu);
3938 for (i = 0; i < online_vcpus; i++) {
3939 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3941 started_vcpu = vcpu->kvm->vcpus[i];
3945 if (started_vcpus == 1) {
3947 * As we only have one VCPU left, we want to enable the
3948 * IBS facility for that VCPU to speed it up.
3950 __enable_ibs_on_vcpu(started_vcpu);
3953 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3957 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3958 struct kvm_enable_cap *cap)
3966 case KVM_CAP_S390_CSS_SUPPORT:
3967 if (!vcpu->kvm->arch.css_support) {
3968 vcpu->kvm->arch.css_support = 1;
3969 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3970 trace_kvm_s390_enable_css(vcpu->kvm);
3981 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3982 struct kvm_s390_mem_op *mop)
3984 void __user *uaddr = (void __user *)mop->buf;
3985 void *tmpbuf = NULL;
3987 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3988 | KVM_S390_MEMOP_F_CHECK_ONLY;
3990 if (mop->flags & ~supported_flags)
3993 if (mop->size > MEM_OP_MAX_SIZE)
3996 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3997 tmpbuf = vmalloc(mop->size);
4002 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4005 case KVM_S390_MEMOP_LOGICAL_READ:
4006 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4007 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4008 mop->size, GACC_FETCH);
4011 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4013 if (copy_to_user(uaddr, tmpbuf, mop->size))
4017 case KVM_S390_MEMOP_LOGICAL_WRITE:
4018 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4019 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4020 mop->size, GACC_STORE);
4023 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4027 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4033 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4035 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4036 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4042 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4043 unsigned int ioctl, unsigned long arg)
4045 struct kvm_vcpu *vcpu = filp->private_data;
4046 void __user *argp = (void __user *)arg;
4049 case KVM_S390_IRQ: {
4050 struct kvm_s390_irq s390irq;
4052 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4054 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4056 case KVM_S390_INTERRUPT: {
4057 struct kvm_s390_interrupt s390int;
4058 struct kvm_s390_irq s390irq;
4060 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4062 if (s390int_to_s390irq(&s390int, &s390irq))
4064 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4067 return -ENOIOCTLCMD;
4070 long kvm_arch_vcpu_ioctl(struct file *filp,
4071 unsigned int ioctl, unsigned long arg)
4073 struct kvm_vcpu *vcpu = filp->private_data;
4074 void __user *argp = (void __user *)arg;
4081 case KVM_S390_STORE_STATUS:
4082 idx = srcu_read_lock(&vcpu->kvm->srcu);
4083 r = kvm_s390_vcpu_store_status(vcpu, arg);
4084 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4086 case KVM_S390_SET_INITIAL_PSW: {
4090 if (copy_from_user(&psw, argp, sizeof(psw)))
4092 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4095 case KVM_S390_INITIAL_RESET:
4096 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4098 case KVM_SET_ONE_REG:
4099 case KVM_GET_ONE_REG: {
4100 struct kvm_one_reg reg;
4102 if (copy_from_user(®, argp, sizeof(reg)))
4104 if (ioctl == KVM_SET_ONE_REG)
4105 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4107 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4110 #ifdef CONFIG_KVM_S390_UCONTROL
4111 case KVM_S390_UCAS_MAP: {
4112 struct kvm_s390_ucas_mapping ucasmap;
4114 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4119 if (!kvm_is_ucontrol(vcpu->kvm)) {
4124 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4125 ucasmap.vcpu_addr, ucasmap.length);
4128 case KVM_S390_UCAS_UNMAP: {
4129 struct kvm_s390_ucas_mapping ucasmap;
4131 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4136 if (!kvm_is_ucontrol(vcpu->kvm)) {
4141 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4146 case KVM_S390_VCPU_FAULT: {
4147 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4150 case KVM_ENABLE_CAP:
4152 struct kvm_enable_cap cap;
4154 if (copy_from_user(&cap, argp, sizeof(cap)))
4156 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4159 case KVM_S390_MEM_OP: {
4160 struct kvm_s390_mem_op mem_op;
4162 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4163 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4168 case KVM_S390_SET_IRQ_STATE: {
4169 struct kvm_s390_irq_state irq_state;
4172 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4174 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4175 irq_state.len == 0 ||
4176 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4180 /* do not use irq_state.flags, it will break old QEMUs */
4181 r = kvm_s390_set_irq_state(vcpu,
4182 (void __user *) irq_state.buf,
4186 case KVM_S390_GET_IRQ_STATE: {
4187 struct kvm_s390_irq_state irq_state;
4190 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4192 if (irq_state.len == 0) {
4196 /* do not use irq_state.flags, it will break old QEMUs */
4197 r = kvm_s390_get_irq_state(vcpu,
4198 (__u8 __user *) irq_state.buf,
4210 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4212 #ifdef CONFIG_KVM_S390_UCONTROL
4213 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4214 && (kvm_is_ucontrol(vcpu->kvm))) {
4215 vmf->page = virt_to_page(vcpu->arch.sie_block);
4216 get_page(vmf->page);
4220 return VM_FAULT_SIGBUS;
4223 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4224 unsigned long npages)
4229 /* Section: memory related */
4230 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4231 struct kvm_memory_slot *memslot,
4232 const struct kvm_userspace_memory_region *mem,
4233 enum kvm_mr_change change)
4235 /* A few sanity checks. We can have memory slots which have to be
4236 located/ended at a segment boundary (1MB). The memory in userland is
4237 ok to be fragmented into various different vmas. It is okay to mmap()
4238 and munmap() stuff in this slot after doing this call at any time */
4240 if (mem->userspace_addr & 0xffffful)
4243 if (mem->memory_size & 0xffffful)
4246 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4252 void kvm_arch_commit_memory_region(struct kvm *kvm,
4253 const struct kvm_userspace_memory_region *mem,
4254 const struct kvm_memory_slot *old,
4255 const struct kvm_memory_slot *new,
4256 enum kvm_mr_change change)
4260 /* If the basics of the memslot do not change, we do not want
4261 * to update the gmap. Every update causes several unnecessary
4262 * segment translation exceptions. This is usually handled just
4263 * fine by the normal fault handler + gmap, but it will also
4264 * cause faults on the prefix page of running guest CPUs.
4266 if (old->userspace_addr == mem->userspace_addr &&
4267 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4268 old->npages * PAGE_SIZE == mem->memory_size)
4271 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4272 mem->guest_phys_addr, mem->memory_size);
4274 pr_warn("failed to commit memory region\n");
4278 static inline unsigned long nonhyp_mask(int i)
4280 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4282 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4285 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4287 vcpu->valid_wakeup = false;
4290 static int __init kvm_s390_init(void)
4294 if (!sclp.has_sief2) {
4295 pr_info("SIE not available\n");
4299 if (nested && hpage) {
4300 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4304 for (i = 0; i < 16; i++)
4305 kvm_s390_fac_base[i] |=
4306 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4308 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4311 static void __exit kvm_s390_exit(void)
4316 module_init(kvm_s390_init);
4317 module_exit(kvm_s390_exit);
4320 * Enable autoloading of the kvm module.
4321 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4322 * since x86 takes a different approach.
4324 #include <linux/miscdevice.h>
4325 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4326 MODULE_ALIAS("devname:kvm");