1 // SPDX-License-Identifier: GPL-2.0
3 * hosting IBM Z kernel virtual machines (s390x)
5 * Copyright IBM Corp. 2008, 2018
7 * Author(s): Carsten Otte <cotte@de.ibm.com>
8 * Christian Borntraeger <borntraeger@de.ibm.com>
9 * Heiko Carstens <heiko.carstens@de.ibm.com>
10 * Christian Ehrhardt <ehrhardt@de.ibm.com>
11 * Jason J. Herne <jjherne@us.ibm.com>
14 #include <linux/compiler.h>
15 #include <linux/err.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
35 #include <asm/pgtable.h>
38 #include <asm/switch_to.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
47 #define KMSG_COMPONENT "kvm-s390"
49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
51 #define CREATE_TRACE_POINTS
53 #include "trace-s390.h"
55 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58 (KVM_MAX_VCPUS + LOCAL_IRQS))
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64 { "userspace_handled", VCPU_STAT(exit_userspace) },
65 { "exit_null", VCPU_STAT(exit_null) },
66 { "exit_validity", VCPU_STAT(exit_validity) },
67 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68 { "exit_external_request", VCPU_STAT(exit_external_request) },
69 { "exit_io_request", VCPU_STAT(exit_io_request) },
70 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 { "exit_instruction", VCPU_STAT(exit_instruction) },
72 { "exit_pei", VCPU_STAT(exit_pei) },
73 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 { "deliver_ckc", VCPU_STAT(deliver_ckc) },
85 { "deliver_cputm", VCPU_STAT(deliver_cputm) },
86 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
88 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89 { "deliver_virtio", VCPU_STAT(deliver_virtio) },
90 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93 { "deliver_program", VCPU_STAT(deliver_program) },
94 { "deliver_io", VCPU_STAT(deliver_io) },
95 { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
97 { "inject_ckc", VCPU_STAT(inject_ckc) },
98 { "inject_cputm", VCPU_STAT(inject_cputm) },
99 { "inject_external_call", VCPU_STAT(inject_external_call) },
100 { "inject_float_mchk", VM_STAT(inject_float_mchk) },
101 { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102 { "inject_io", VM_STAT(inject_io) },
103 { "inject_mchk", VCPU_STAT(inject_mchk) },
104 { "inject_pfault_done", VM_STAT(inject_pfault_done) },
105 { "inject_program", VCPU_STAT(inject_program) },
106 { "inject_restart", VCPU_STAT(inject_restart) },
107 { "inject_service_signal", VM_STAT(inject_service_signal) },
108 { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109 { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110 { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111 { "inject_virtio", VM_STAT(inject_virtio) },
112 { "instruction_epsw", VCPU_STAT(instruction_epsw) },
113 { "instruction_gs", VCPU_STAT(instruction_gs) },
114 { "instruction_io_other", VCPU_STAT(instruction_io_other) },
115 { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116 { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118 { "instruction_ptff", VCPU_STAT(instruction_ptff) },
119 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
120 { "instruction_sck", VCPU_STAT(instruction_sck) },
121 { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122 { "instruction_spx", VCPU_STAT(instruction_spx) },
123 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
124 { "instruction_stap", VCPU_STAT(instruction_stap) },
125 { "instruction_iske", VCPU_STAT(instruction_iske) },
126 { "instruction_ri", VCPU_STAT(instruction_ri) },
127 { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128 { "instruction_sske", VCPU_STAT(instruction_sske) },
129 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130 { "instruction_essa", VCPU_STAT(instruction_essa) },
131 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
132 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
133 { "instruction_tb", VCPU_STAT(instruction_tb) },
134 { "instruction_tpi", VCPU_STAT(instruction_tpi) },
135 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
136 { "instruction_tsch", VCPU_STAT(instruction_tsch) },
137 { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138 { "instruction_sie", VCPU_STAT(instruction_sie) },
139 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155 { "instruction_diag_10", VCPU_STAT(diagnose_10) },
156 { "instruction_diag_44", VCPU_STAT(diagnose_44) },
157 { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158 { "instruction_diag_258", VCPU_STAT(diagnose_258) },
159 { "instruction_diag_308", VCPU_STAT(diagnose_308) },
160 { "instruction_diag_500", VCPU_STAT(diagnose_500) },
161 { "instruction_diag_other", VCPU_STAT(diagnose_other) },
165 struct kvm_s390_tod_clock_ext {
171 /* allow nested virtualization in KVM (if enabled by user space) */
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
176 /* allow 1m huge page guest backing, if !nested */
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182 * For now we handle at most 16 double words as this is what the s390 base
183 * kernel handles and stores in the prefix page. If we ever need to go beyond
184 * this, this requires changes to code, but the external uapi can stay.
186 #define SIZE_INTERNAL 16
189 * Base feature mask that defines default mask for facilities. Consists of the
190 * defines in FACILITIES_KVM and the non-hypervisor managed bits.
192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
194 * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
195 * and defines the facilities that can be enabled via a cpu model.
197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
199 static unsigned long kvm_s390_fac_size(void)
201 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
202 BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
203 BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
204 sizeof(S390_lowcore.stfle_fac_list));
206 return SIZE_INTERNAL;
209 /* available cpu features supported by kvm */
210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
211 /* available subfunctions indicated via query / "test bit" */
212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
214 static struct gmap_notifier gmap_notifier;
215 static struct gmap_notifier vsie_gmap_notifier;
216 debug_info_t *kvm_s390_dbf;
218 /* Section: not file related */
219 int kvm_arch_hardware_enable(void)
221 /* every s390 is virtualization enabled ;-) */
225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
233 * The TOD jumps by delta, we have to compensate this by adding
234 * -delta to the epoch.
238 /* sign-extension - we're adding to signed values below */
243 if (scb->ecd & ECD_MEF) {
244 scb->epdx += delta_idx;
245 if (scb->epoch < delta)
251 * This callback is executed during stop_machine(). All CPUs are therefore
252 * temporarily stopped. In order not to change guest behavior, we have to
253 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
254 * so a CPU won't be stopped while calculating with the epoch.
256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
260 struct kvm_vcpu *vcpu;
262 unsigned long long *delta = v;
264 list_for_each_entry(kvm, &vm_list, vm_list) {
265 kvm_for_each_vcpu(i, vcpu, kvm) {
266 kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
268 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
269 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
271 if (vcpu->arch.cputm_enabled)
272 vcpu->arch.cputm_start += *delta;
273 if (vcpu->arch.vsie_block)
274 kvm_clock_sync_scb(vcpu->arch.vsie_block,
281 static struct notifier_block kvm_clock_notifier = {
282 .notifier_call = kvm_clock_sync,
285 int kvm_arch_hardware_setup(void)
287 gmap_notifier.notifier_call = kvm_gmap_notifier;
288 gmap_register_pte_notifier(&gmap_notifier);
289 vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
290 gmap_register_pte_notifier(&vsie_gmap_notifier);
291 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
292 &kvm_clock_notifier);
296 void kvm_arch_hardware_unsetup(void)
298 gmap_unregister_pte_notifier(&gmap_notifier);
299 gmap_unregister_pte_notifier(&vsie_gmap_notifier);
300 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
301 &kvm_clock_notifier);
304 static void allow_cpu_feat(unsigned long nr)
306 set_bit_inv(nr, kvm_s390_available_cpu_feat);
309 static inline int plo_test_bit(unsigned char nr)
311 register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
315 /* Parameter registers are ignored for "test bit" */
325 static void kvm_s390_cpu_feat_init(void)
329 for (i = 0; i < 256; ++i) {
331 kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
334 if (test_facility(28)) /* TOD-clock steering */
335 ptff(kvm_s390_available_subfunc.ptff,
336 sizeof(kvm_s390_available_subfunc.ptff),
339 if (test_facility(17)) { /* MSA */
340 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
341 kvm_s390_available_subfunc.kmac);
342 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
343 kvm_s390_available_subfunc.kmc);
344 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
345 kvm_s390_available_subfunc.km);
346 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
347 kvm_s390_available_subfunc.kimd);
348 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
349 kvm_s390_available_subfunc.klmd);
351 if (test_facility(76)) /* MSA3 */
352 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
353 kvm_s390_available_subfunc.pckmo);
354 if (test_facility(77)) { /* MSA4 */
355 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
356 kvm_s390_available_subfunc.kmctr);
357 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
358 kvm_s390_available_subfunc.kmf);
359 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
360 kvm_s390_available_subfunc.kmo);
361 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
362 kvm_s390_available_subfunc.pcc);
364 if (test_facility(57)) /* MSA5 */
365 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
366 kvm_s390_available_subfunc.ppno);
368 if (test_facility(146)) /* MSA8 */
369 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
370 kvm_s390_available_subfunc.kma);
372 if (MACHINE_HAS_ESOP)
373 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
375 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
376 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
378 if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
379 !test_facility(3) || !nested)
381 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
382 if (sclp.has_64bscao)
383 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
385 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
387 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
389 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
391 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
393 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
395 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
397 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
399 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
400 * all skey handling functions read/set the skey from the PGSTE
401 * instead of the real storage key.
403 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
404 * pages being detected as preserved although they are resident.
406 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
407 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
409 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
410 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
411 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
413 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
414 * cannot easily shadow the SCA because of the ipte lock.
418 int kvm_arch_init(void *opaque)
420 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
424 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
425 debug_unregister(kvm_s390_dbf);
429 kvm_s390_cpu_feat_init();
431 /* Register floating interrupt controller interface. */
432 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
435 void kvm_arch_exit(void)
437 debug_unregister(kvm_s390_dbf);
440 /* Section: device related */
441 long kvm_arch_dev_ioctl(struct file *filp,
442 unsigned int ioctl, unsigned long arg)
444 if (ioctl == KVM_S390_ENABLE_SIE)
445 return s390_enable_sie();
449 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
454 case KVM_CAP_S390_PSW:
455 case KVM_CAP_S390_GMAP:
456 case KVM_CAP_SYNC_MMU:
457 #ifdef CONFIG_KVM_S390_UCONTROL
458 case KVM_CAP_S390_UCONTROL:
460 case KVM_CAP_ASYNC_PF:
461 case KVM_CAP_SYNC_REGS:
462 case KVM_CAP_ONE_REG:
463 case KVM_CAP_ENABLE_CAP:
464 case KVM_CAP_S390_CSS_SUPPORT:
465 case KVM_CAP_IOEVENTFD:
466 case KVM_CAP_DEVICE_CTRL:
467 case KVM_CAP_ENABLE_CAP_VM:
468 case KVM_CAP_S390_IRQCHIP:
469 case KVM_CAP_VM_ATTRIBUTES:
470 case KVM_CAP_MP_STATE:
471 case KVM_CAP_IMMEDIATE_EXIT:
472 case KVM_CAP_S390_INJECT_IRQ:
473 case KVM_CAP_S390_USER_SIGP:
474 case KVM_CAP_S390_USER_STSI:
475 case KVM_CAP_S390_SKEYS:
476 case KVM_CAP_S390_IRQ_STATE:
477 case KVM_CAP_S390_USER_INSTR0:
478 case KVM_CAP_S390_CMMA_MIGRATION:
479 case KVM_CAP_S390_AIS:
480 case KVM_CAP_S390_AIS_MIGRATION:
483 case KVM_CAP_S390_HPAGE_1M:
488 case KVM_CAP_S390_MEM_OP:
491 case KVM_CAP_NR_VCPUS:
492 case KVM_CAP_MAX_VCPUS:
493 r = KVM_S390_BSCA_CPU_SLOTS;
494 if (!kvm_s390_use_sca_entries())
496 else if (sclp.has_esca && sclp.has_64bscao)
497 r = KVM_S390_ESCA_CPU_SLOTS;
499 case KVM_CAP_NR_MEMSLOTS:
500 r = KVM_USER_MEM_SLOTS;
502 case KVM_CAP_S390_COW:
503 r = MACHINE_HAS_ESOP;
505 case KVM_CAP_S390_VECTOR_REGISTERS:
508 case KVM_CAP_S390_RI:
509 r = test_facility(64);
511 case KVM_CAP_S390_GS:
512 r = test_facility(133);
514 case KVM_CAP_S390_BPB:
515 r = test_facility(82);
523 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
524 struct kvm_memory_slot *memslot)
527 gfn_t cur_gfn, last_gfn;
528 unsigned long gaddr, vmaddr;
529 struct gmap *gmap = kvm->arch.gmap;
530 DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
532 /* Loop over all guest segments */
533 cur_gfn = memslot->base_gfn;
534 last_gfn = memslot->base_gfn + memslot->npages;
535 for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
536 gaddr = gfn_to_gpa(cur_gfn);
537 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
538 if (kvm_is_error_hva(vmaddr))
541 bitmap_zero(bitmap, _PAGE_ENTRIES);
542 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
543 for (i = 0; i < _PAGE_ENTRIES; i++) {
544 if (test_bit(i, bitmap))
545 mark_page_dirty(kvm, cur_gfn + i);
548 if (fatal_signal_pending(current))
554 /* Section: vm related */
555 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
558 * Get (and clear) the dirty memory log for a memory slot.
560 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
561 struct kvm_dirty_log *log)
565 struct kvm_memslots *slots;
566 struct kvm_memory_slot *memslot;
569 if (kvm_is_ucontrol(kvm))
572 mutex_lock(&kvm->slots_lock);
575 if (log->slot >= KVM_USER_MEM_SLOTS)
578 slots = kvm_memslots(kvm);
579 memslot = id_to_memslot(slots, log->slot);
581 if (!memslot->dirty_bitmap)
584 kvm_s390_sync_dirty_log(kvm, memslot);
585 r = kvm_get_dirty_log(kvm, log, &is_dirty);
589 /* Clear the dirty log */
591 n = kvm_dirty_bitmap_bytes(memslot);
592 memset(memslot->dirty_bitmap, 0, n);
596 mutex_unlock(&kvm->slots_lock);
600 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
603 struct kvm_vcpu *vcpu;
605 kvm_for_each_vcpu(i, vcpu, kvm) {
606 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
610 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
618 case KVM_CAP_S390_IRQCHIP:
619 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
620 kvm->arch.use_irqchip = 1;
623 case KVM_CAP_S390_USER_SIGP:
624 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
625 kvm->arch.user_sigp = 1;
628 case KVM_CAP_S390_VECTOR_REGISTERS:
629 mutex_lock(&kvm->lock);
630 if (kvm->created_vcpus) {
632 } else if (MACHINE_HAS_VX) {
633 set_kvm_facility(kvm->arch.model.fac_mask, 129);
634 set_kvm_facility(kvm->arch.model.fac_list, 129);
635 if (test_facility(134)) {
636 set_kvm_facility(kvm->arch.model.fac_mask, 134);
637 set_kvm_facility(kvm->arch.model.fac_list, 134);
639 if (test_facility(135)) {
640 set_kvm_facility(kvm->arch.model.fac_mask, 135);
641 set_kvm_facility(kvm->arch.model.fac_list, 135);
646 mutex_unlock(&kvm->lock);
647 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
648 r ? "(not available)" : "(success)");
650 case KVM_CAP_S390_RI:
652 mutex_lock(&kvm->lock);
653 if (kvm->created_vcpus) {
655 } else if (test_facility(64)) {
656 set_kvm_facility(kvm->arch.model.fac_mask, 64);
657 set_kvm_facility(kvm->arch.model.fac_list, 64);
660 mutex_unlock(&kvm->lock);
661 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
662 r ? "(not available)" : "(success)");
664 case KVM_CAP_S390_AIS:
665 mutex_lock(&kvm->lock);
666 if (kvm->created_vcpus) {
669 set_kvm_facility(kvm->arch.model.fac_mask, 72);
670 set_kvm_facility(kvm->arch.model.fac_list, 72);
673 mutex_unlock(&kvm->lock);
674 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
675 r ? "(not available)" : "(success)");
677 case KVM_CAP_S390_GS:
679 mutex_lock(&kvm->lock);
680 if (kvm->created_vcpus) {
682 } else if (test_facility(133)) {
683 set_kvm_facility(kvm->arch.model.fac_mask, 133);
684 set_kvm_facility(kvm->arch.model.fac_list, 133);
687 mutex_unlock(&kvm->lock);
688 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
689 r ? "(not available)" : "(success)");
691 case KVM_CAP_S390_HPAGE_1M:
692 mutex_lock(&kvm->lock);
693 if (kvm->created_vcpus)
695 else if (!hpage || kvm->arch.use_cmma)
699 down_write(&kvm->mm->mmap_sem);
700 kvm->mm->context.allow_gmap_hpage_1m = 1;
701 up_write(&kvm->mm->mmap_sem);
703 * We might have to create fake 4k page
704 * tables. To avoid that the hardware works on
705 * stale PGSTEs, we emulate these instructions.
707 kvm->arch.use_skf = 0;
708 kvm->arch.use_pfmfi = 0;
710 mutex_unlock(&kvm->lock);
711 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
712 r ? "(not available)" : "(success)");
714 case KVM_CAP_S390_USER_STSI:
715 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
716 kvm->arch.user_stsi = 1;
719 case KVM_CAP_S390_USER_INSTR0:
720 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
721 kvm->arch.user_instr0 = 1;
722 icpt_operexc_on_all_vcpus(kvm);
732 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
736 switch (attr->attr) {
737 case KVM_S390_VM_MEM_LIMIT_SIZE:
739 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
740 kvm->arch.mem_limit);
741 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
751 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
755 switch (attr->attr) {
756 case KVM_S390_VM_MEM_ENABLE_CMMA:
761 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
762 mutex_lock(&kvm->lock);
763 if (kvm->created_vcpus)
765 else if (kvm->mm->context.allow_gmap_hpage_1m)
768 kvm->arch.use_cmma = 1;
769 /* Not compatible with cmma. */
770 kvm->arch.use_pfmfi = 0;
773 mutex_unlock(&kvm->lock);
775 case KVM_S390_VM_MEM_CLR_CMMA:
780 if (!kvm->arch.use_cmma)
783 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
784 mutex_lock(&kvm->lock);
785 idx = srcu_read_lock(&kvm->srcu);
786 s390_reset_cmma(kvm->arch.gmap->mm);
787 srcu_read_unlock(&kvm->srcu, idx);
788 mutex_unlock(&kvm->lock);
791 case KVM_S390_VM_MEM_LIMIT_SIZE: {
792 unsigned long new_limit;
794 if (kvm_is_ucontrol(kvm))
797 if (get_user(new_limit, (u64 __user *)attr->addr))
800 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
801 new_limit > kvm->arch.mem_limit)
807 /* gmap_create takes last usable address */
808 if (new_limit != KVM_S390_NO_MEM_LIMIT)
812 mutex_lock(&kvm->lock);
813 if (!kvm->created_vcpus) {
814 /* gmap_create will round the limit up */
815 struct gmap *new = gmap_create(current->mm, new_limit);
820 gmap_remove(kvm->arch.gmap);
822 kvm->arch.gmap = new;
826 mutex_unlock(&kvm->lock);
827 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
828 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
829 (void *) kvm->arch.gmap->asce);
839 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
841 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
843 struct kvm_vcpu *vcpu;
846 kvm_s390_vcpu_block_all(kvm);
848 kvm_for_each_vcpu(i, vcpu, kvm) {
849 kvm_s390_vcpu_crypto_setup(vcpu);
850 /* recreate the shadow crycb by leaving the VSIE handler */
851 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
854 kvm_s390_vcpu_unblock_all(kvm);
857 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
859 if (!test_kvm_facility(kvm, 76))
862 mutex_lock(&kvm->lock);
863 switch (attr->attr) {
864 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
866 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
867 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
868 kvm->arch.crypto.aes_kw = 1;
869 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
871 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
873 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
874 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
875 kvm->arch.crypto.dea_kw = 1;
876 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
878 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
879 kvm->arch.crypto.aes_kw = 0;
880 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
881 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
882 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
884 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
885 kvm->arch.crypto.dea_kw = 0;
886 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
887 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
888 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
891 mutex_unlock(&kvm->lock);
895 kvm_s390_vcpu_crypto_reset_all(kvm);
896 mutex_unlock(&kvm->lock);
900 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
903 struct kvm_vcpu *vcpu;
905 kvm_for_each_vcpu(cx, vcpu, kvm)
906 kvm_s390_sync_request(req, vcpu);
910 * Must be called with kvm->srcu held to avoid races on memslots, and with
911 * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
913 static int kvm_s390_vm_start_migration(struct kvm *kvm)
915 struct kvm_memory_slot *ms;
916 struct kvm_memslots *slots;
917 unsigned long ram_pages = 0;
920 /* migration mode already enabled */
921 if (kvm->arch.migration_mode)
923 slots = kvm_memslots(kvm);
924 if (!slots || !slots->used_slots)
927 if (!kvm->arch.use_cmma) {
928 kvm->arch.migration_mode = 1;
931 /* mark all the pages in active slots as dirty */
932 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
933 ms = slots->memslots + slotnr;
935 * The second half of the bitmap is only used on x86,
936 * and would be wasted otherwise, so we put it to good
937 * use here to keep track of the state of the storage
940 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
941 ram_pages += ms->npages;
943 atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
944 kvm->arch.migration_mode = 1;
945 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
950 * Must be called with kvm->slots_lock to avoid races with ourselves and
951 * kvm_s390_vm_start_migration.
953 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
955 /* migration mode already disabled */
956 if (!kvm->arch.migration_mode)
958 kvm->arch.migration_mode = 0;
959 if (kvm->arch.use_cmma)
960 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
964 static int kvm_s390_vm_set_migration(struct kvm *kvm,
965 struct kvm_device_attr *attr)
969 mutex_lock(&kvm->slots_lock);
970 switch (attr->attr) {
971 case KVM_S390_VM_MIGRATION_START:
972 res = kvm_s390_vm_start_migration(kvm);
974 case KVM_S390_VM_MIGRATION_STOP:
975 res = kvm_s390_vm_stop_migration(kvm);
980 mutex_unlock(&kvm->slots_lock);
985 static int kvm_s390_vm_get_migration(struct kvm *kvm,
986 struct kvm_device_attr *attr)
988 u64 mig = kvm->arch.migration_mode;
990 if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
993 if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
998 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1000 struct kvm_s390_vm_tod_clock gtod;
1002 if (copy_from_user(>od, (void __user *)attr->addr, sizeof(gtod)))
1005 if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1007 kvm_s390_set_tod_clock(kvm, >od);
1009 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1010 gtod.epoch_idx, gtod.tod);
1015 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1019 if (copy_from_user(>od_high, (void __user *)attr->addr,
1025 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1030 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1032 struct kvm_s390_vm_tod_clock gtod = { 0 };
1034 if (copy_from_user(>od.tod, (void __user *)attr->addr,
1038 kvm_s390_set_tod_clock(kvm, >od);
1039 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1043 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1050 switch (attr->attr) {
1051 case KVM_S390_VM_TOD_EXT:
1052 ret = kvm_s390_set_tod_ext(kvm, attr);
1054 case KVM_S390_VM_TOD_HIGH:
1055 ret = kvm_s390_set_tod_high(kvm, attr);
1057 case KVM_S390_VM_TOD_LOW:
1058 ret = kvm_s390_set_tod_low(kvm, attr);
1067 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1068 struct kvm_s390_vm_tod_clock *gtod)
1070 struct kvm_s390_tod_clock_ext htod;
1074 get_tod_clock_ext((char *)&htod);
1076 gtod->tod = htod.tod + kvm->arch.epoch;
1077 gtod->epoch_idx = 0;
1078 if (test_kvm_facility(kvm, 139)) {
1079 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1080 if (gtod->tod < htod.tod)
1081 gtod->epoch_idx += 1;
1087 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1089 struct kvm_s390_vm_tod_clock gtod;
1091 memset(>od, 0, sizeof(gtod));
1092 kvm_s390_get_tod_clock(kvm, >od);
1093 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1096 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1097 gtod.epoch_idx, gtod.tod);
1101 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1105 if (copy_to_user((void __user *)attr->addr, >od_high,
1108 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1113 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1117 gtod = kvm_s390_get_tod_clock_fast(kvm);
1118 if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod)))
1120 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1125 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1132 switch (attr->attr) {
1133 case KVM_S390_VM_TOD_EXT:
1134 ret = kvm_s390_get_tod_ext(kvm, attr);
1136 case KVM_S390_VM_TOD_HIGH:
1137 ret = kvm_s390_get_tod_high(kvm, attr);
1139 case KVM_S390_VM_TOD_LOW:
1140 ret = kvm_s390_get_tod_low(kvm, attr);
1149 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1151 struct kvm_s390_vm_cpu_processor *proc;
1152 u16 lowest_ibc, unblocked_ibc;
1155 mutex_lock(&kvm->lock);
1156 if (kvm->created_vcpus) {
1160 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1165 if (!copy_from_user(proc, (void __user *)attr->addr,
1167 kvm->arch.model.cpuid = proc->cpuid;
1168 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1169 unblocked_ibc = sclp.ibc & 0xfff;
1170 if (lowest_ibc && proc->ibc) {
1171 if (proc->ibc > unblocked_ibc)
1172 kvm->arch.model.ibc = unblocked_ibc;
1173 else if (proc->ibc < lowest_ibc)
1174 kvm->arch.model.ibc = lowest_ibc;
1176 kvm->arch.model.ibc = proc->ibc;
1178 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1179 S390_ARCH_FAC_LIST_SIZE_BYTE);
1180 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1181 kvm->arch.model.ibc,
1182 kvm->arch.model.cpuid);
1183 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1184 kvm->arch.model.fac_list[0],
1185 kvm->arch.model.fac_list[1],
1186 kvm->arch.model.fac_list[2]);
1191 mutex_unlock(&kvm->lock);
1195 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1196 struct kvm_device_attr *attr)
1198 struct kvm_s390_vm_cpu_feat data;
1200 if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1202 if (!bitmap_subset((unsigned long *) data.feat,
1203 kvm_s390_available_cpu_feat,
1204 KVM_S390_VM_CPU_FEAT_NR_BITS))
1207 mutex_lock(&kvm->lock);
1208 if (kvm->created_vcpus) {
1209 mutex_unlock(&kvm->lock);
1212 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1213 KVM_S390_VM_CPU_FEAT_NR_BITS);
1214 mutex_unlock(&kvm->lock);
1215 VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1222 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1223 struct kvm_device_attr *attr)
1226 * Once supported by kernel + hw, we have to store the subfunctions
1227 * in kvm->arch and remember that user space configured them.
1232 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1236 switch (attr->attr) {
1237 case KVM_S390_VM_CPU_PROCESSOR:
1238 ret = kvm_s390_set_processor(kvm, attr);
1240 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1241 ret = kvm_s390_set_processor_feat(kvm, attr);
1243 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1244 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1250 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1252 struct kvm_s390_vm_cpu_processor *proc;
1255 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1260 proc->cpuid = kvm->arch.model.cpuid;
1261 proc->ibc = kvm->arch.model.ibc;
1262 memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1263 S390_ARCH_FAC_LIST_SIZE_BYTE);
1264 VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1265 kvm->arch.model.ibc,
1266 kvm->arch.model.cpuid);
1267 VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1268 kvm->arch.model.fac_list[0],
1269 kvm->arch.model.fac_list[1],
1270 kvm->arch.model.fac_list[2]);
1271 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1278 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1280 struct kvm_s390_vm_cpu_machine *mach;
1283 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1288 get_cpu_id((struct cpuid *) &mach->cpuid);
1289 mach->ibc = sclp.ibc;
1290 memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1291 S390_ARCH_FAC_LIST_SIZE_BYTE);
1292 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1293 sizeof(S390_lowcore.stfle_fac_list));
1294 VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx",
1295 kvm->arch.model.ibc,
1296 kvm->arch.model.cpuid);
1297 VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx",
1301 VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1305 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1312 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1313 struct kvm_device_attr *attr)
1315 struct kvm_s390_vm_cpu_feat data;
1317 bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1318 KVM_S390_VM_CPU_FEAT_NR_BITS);
1319 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1321 VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1328 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1329 struct kvm_device_attr *attr)
1331 struct kvm_s390_vm_cpu_feat data;
1333 bitmap_copy((unsigned long *) data.feat,
1334 kvm_s390_available_cpu_feat,
1335 KVM_S390_VM_CPU_FEAT_NR_BITS);
1336 if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1338 VM_EVENT(kvm, 3, "GET: host feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1345 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1346 struct kvm_device_attr *attr)
1349 * Once we can actually configure subfunctions (kernel + hw support),
1350 * we have to check if they were already set by user space, if so copy
1351 * them from kvm->arch.
1356 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1357 struct kvm_device_attr *attr)
1359 if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1360 sizeof(struct kvm_s390_vm_cpu_subfunc)))
1364 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1368 switch (attr->attr) {
1369 case KVM_S390_VM_CPU_PROCESSOR:
1370 ret = kvm_s390_get_processor(kvm, attr);
1372 case KVM_S390_VM_CPU_MACHINE:
1373 ret = kvm_s390_get_machine(kvm, attr);
1375 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1376 ret = kvm_s390_get_processor_feat(kvm, attr);
1378 case KVM_S390_VM_CPU_MACHINE_FEAT:
1379 ret = kvm_s390_get_machine_feat(kvm, attr);
1381 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1382 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1384 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1385 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1391 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1395 switch (attr->group) {
1396 case KVM_S390_VM_MEM_CTRL:
1397 ret = kvm_s390_set_mem_control(kvm, attr);
1399 case KVM_S390_VM_TOD:
1400 ret = kvm_s390_set_tod(kvm, attr);
1402 case KVM_S390_VM_CPU_MODEL:
1403 ret = kvm_s390_set_cpu_model(kvm, attr);
1405 case KVM_S390_VM_CRYPTO:
1406 ret = kvm_s390_vm_set_crypto(kvm, attr);
1408 case KVM_S390_VM_MIGRATION:
1409 ret = kvm_s390_vm_set_migration(kvm, attr);
1419 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1423 switch (attr->group) {
1424 case KVM_S390_VM_MEM_CTRL:
1425 ret = kvm_s390_get_mem_control(kvm, attr);
1427 case KVM_S390_VM_TOD:
1428 ret = kvm_s390_get_tod(kvm, attr);
1430 case KVM_S390_VM_CPU_MODEL:
1431 ret = kvm_s390_get_cpu_model(kvm, attr);
1433 case KVM_S390_VM_MIGRATION:
1434 ret = kvm_s390_vm_get_migration(kvm, attr);
1444 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1448 switch (attr->group) {
1449 case KVM_S390_VM_MEM_CTRL:
1450 switch (attr->attr) {
1451 case KVM_S390_VM_MEM_ENABLE_CMMA:
1452 case KVM_S390_VM_MEM_CLR_CMMA:
1453 ret = sclp.has_cmma ? 0 : -ENXIO;
1455 case KVM_S390_VM_MEM_LIMIT_SIZE:
1463 case KVM_S390_VM_TOD:
1464 switch (attr->attr) {
1465 case KVM_S390_VM_TOD_LOW:
1466 case KVM_S390_VM_TOD_HIGH:
1474 case KVM_S390_VM_CPU_MODEL:
1475 switch (attr->attr) {
1476 case KVM_S390_VM_CPU_PROCESSOR:
1477 case KVM_S390_VM_CPU_MACHINE:
1478 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1479 case KVM_S390_VM_CPU_MACHINE_FEAT:
1480 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1483 /* configuring subfunctions is not supported yet */
1484 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1490 case KVM_S390_VM_CRYPTO:
1491 switch (attr->attr) {
1492 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1493 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1494 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1495 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1503 case KVM_S390_VM_MIGRATION:
1514 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1518 int srcu_idx, i, r = 0;
1520 if (args->flags != 0)
1523 /* Is this guest using storage keys? */
1524 if (!mm_uses_skeys(current->mm))
1525 return KVM_S390_GET_SKEYS_NONE;
1527 /* Enforce sane limit on memory allocation */
1528 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1531 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1535 down_read(¤t->mm->mmap_sem);
1536 srcu_idx = srcu_read_lock(&kvm->srcu);
1537 for (i = 0; i < args->count; i++) {
1538 hva = gfn_to_hva(kvm, args->start_gfn + i);
1539 if (kvm_is_error_hva(hva)) {
1544 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1548 srcu_read_unlock(&kvm->srcu, srcu_idx);
1549 up_read(¤t->mm->mmap_sem);
1552 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1553 sizeof(uint8_t) * args->count);
1562 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1566 int srcu_idx, i, r = 0;
1569 if (args->flags != 0)
1572 /* Enforce sane limit on memory allocation */
1573 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1576 keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1580 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1581 sizeof(uint8_t) * args->count);
1587 /* Enable storage key handling for the guest */
1588 r = s390_enable_skey();
1593 down_read(¤t->mm->mmap_sem);
1594 srcu_idx = srcu_read_lock(&kvm->srcu);
1595 while (i < args->count) {
1597 hva = gfn_to_hva(kvm, args->start_gfn + i);
1598 if (kvm_is_error_hva(hva)) {
1603 /* Lowest order bit is reserved */
1604 if (keys[i] & 0x01) {
1609 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1611 r = fixup_user_fault(current, current->mm, hva,
1612 FAULT_FLAG_WRITE, &unlocked);
1619 srcu_read_unlock(&kvm->srcu, srcu_idx);
1620 up_read(¤t->mm->mmap_sem);
1627 * Base address and length must be sent at the start of each block, therefore
1628 * it's cheaper to send some clean data, as long as it's less than the size of
1631 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1632 /* for consistency */
1633 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1636 * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1637 * address falls in a hole. In that case the index of one of the memslots
1638 * bordering the hole is returned.
1640 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1642 int start = 0, end = slots->used_slots;
1643 int slot = atomic_read(&slots->lru_slot);
1644 struct kvm_memory_slot *memslots = slots->memslots;
1646 if (gfn >= memslots[slot].base_gfn &&
1647 gfn < memslots[slot].base_gfn + memslots[slot].npages)
1650 while (start < end) {
1651 slot = start + (end - start) / 2;
1653 if (gfn >= memslots[slot].base_gfn)
1659 if (gfn >= memslots[start].base_gfn &&
1660 gfn < memslots[start].base_gfn + memslots[start].npages) {
1661 atomic_set(&slots->lru_slot, start);
1667 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1668 u8 *res, unsigned long bufsize)
1670 unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1673 while (args->count < bufsize) {
1674 hva = gfn_to_hva(kvm, cur_gfn);
1676 * We return an error if the first value was invalid, but we
1677 * return successfully if at least one value was copied.
1679 if (kvm_is_error_hva(hva))
1680 return args->count ? 0 : -EFAULT;
1681 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1683 res[args->count++] = (pgstev >> 24) & 0x43;
1690 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1691 unsigned long cur_gfn)
1693 int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1694 struct kvm_memory_slot *ms = slots->memslots + slotidx;
1695 unsigned long ofs = cur_gfn - ms->base_gfn;
1697 if (ms->base_gfn + ms->npages <= cur_gfn) {
1699 /* If we are above the highest slot, wrap around */
1701 slotidx = slots->used_slots - 1;
1703 ms = slots->memslots + slotidx;
1706 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1707 while ((slotidx > 0) && (ofs >= ms->npages)) {
1709 ms = slots->memslots + slotidx;
1710 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1712 return ms->base_gfn + ofs;
1715 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1716 u8 *res, unsigned long bufsize)
1718 unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1719 struct kvm_memslots *slots = kvm_memslots(kvm);
1720 struct kvm_memory_slot *ms;
1722 cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1723 ms = gfn_to_memslot(kvm, cur_gfn);
1725 args->start_gfn = cur_gfn;
1728 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1729 mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1731 while (args->count < bufsize) {
1732 hva = gfn_to_hva(kvm, cur_gfn);
1733 if (kvm_is_error_hva(hva))
1735 /* Decrement only if we actually flipped the bit to 0 */
1736 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1737 atomic64_dec(&kvm->arch.cmma_dirty_pages);
1738 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1740 /* Save the value */
1741 res[args->count++] = (pgstev >> 24) & 0x43;
1742 /* If the next bit is too far away, stop. */
1743 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1745 /* If we reached the previous "next", find the next one */
1746 if (cur_gfn == next_gfn)
1747 next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1748 /* Reached the end of memory or of the buffer, stop */
1749 if ((next_gfn >= mem_end) ||
1750 (next_gfn - args->start_gfn >= bufsize))
1753 /* Reached the end of the current memslot, take the next one. */
1754 if (cur_gfn - ms->base_gfn >= ms->npages) {
1755 ms = gfn_to_memslot(kvm, cur_gfn);
1764 * This function searches for the next page with dirty CMMA attributes, and
1765 * saves the attributes in the buffer up to either the end of the buffer or
1766 * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1767 * no trailing clean bytes are saved.
1768 * In case no dirty bits were found, or if CMMA was not enabled or used, the
1769 * output buffer will indicate 0 as length.
1771 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1772 struct kvm_s390_cmma_log *args)
1774 unsigned long bufsize;
1775 int srcu_idx, peek, ret;
1778 if (!kvm->arch.use_cmma)
1780 /* Invalid/unsupported flags were specified */
1781 if (args->flags & ~KVM_S390_CMMA_PEEK)
1783 /* Migration mode query, and we are not doing a migration */
1784 peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1785 if (!peek && !kvm->arch.migration_mode)
1787 /* CMMA is disabled or was not used, or the buffer has length zero */
1788 bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1789 if (!bufsize || !kvm->mm->context.uses_cmm) {
1790 memset(args, 0, sizeof(*args));
1793 /* We are not peeking, and there are no dirty pages */
1794 if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1795 memset(args, 0, sizeof(*args));
1799 values = vmalloc(bufsize);
1803 down_read(&kvm->mm->mmap_sem);
1804 srcu_idx = srcu_read_lock(&kvm->srcu);
1806 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1808 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1809 srcu_read_unlock(&kvm->srcu, srcu_idx);
1810 up_read(&kvm->mm->mmap_sem);
1812 if (kvm->arch.migration_mode)
1813 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1815 args->remaining = 0;
1817 if (copy_to_user((void __user *)args->values, values, args->count))
1825 * This function sets the CMMA attributes for the given pages. If the input
1826 * buffer has zero length, no action is taken, otherwise the attributes are
1827 * set and the mm->context.uses_cmm flag is set.
1829 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1830 const struct kvm_s390_cmma_log *args)
1832 unsigned long hva, mask, pgstev, i;
1834 int srcu_idx, r = 0;
1838 if (!kvm->arch.use_cmma)
1840 /* invalid/unsupported flags */
1841 if (args->flags != 0)
1843 /* Enforce sane limit on memory allocation */
1844 if (args->count > KVM_S390_CMMA_SIZE_MAX)
1847 if (args->count == 0)
1850 bits = vmalloc(array_size(sizeof(*bits), args->count));
1854 r = copy_from_user(bits, (void __user *)args->values, args->count);
1860 down_read(&kvm->mm->mmap_sem);
1861 srcu_idx = srcu_read_lock(&kvm->srcu);
1862 for (i = 0; i < args->count; i++) {
1863 hva = gfn_to_hva(kvm, args->start_gfn + i);
1864 if (kvm_is_error_hva(hva)) {
1870 pgstev = pgstev << 24;
1871 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1872 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1874 srcu_read_unlock(&kvm->srcu, srcu_idx);
1875 up_read(&kvm->mm->mmap_sem);
1877 if (!kvm->mm->context.uses_cmm) {
1878 down_write(&kvm->mm->mmap_sem);
1879 kvm->mm->context.uses_cmm = 1;
1880 up_write(&kvm->mm->mmap_sem);
1887 long kvm_arch_vm_ioctl(struct file *filp,
1888 unsigned int ioctl, unsigned long arg)
1890 struct kvm *kvm = filp->private_data;
1891 void __user *argp = (void __user *)arg;
1892 struct kvm_device_attr attr;
1896 case KVM_S390_INTERRUPT: {
1897 struct kvm_s390_interrupt s390int;
1900 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1902 r = kvm_s390_inject_vm(kvm, &s390int);
1905 case KVM_ENABLE_CAP: {
1906 struct kvm_enable_cap cap;
1908 if (copy_from_user(&cap, argp, sizeof(cap)))
1910 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1913 case KVM_CREATE_IRQCHIP: {
1914 struct kvm_irq_routing_entry routing;
1917 if (kvm->arch.use_irqchip) {
1918 /* Set up dummy routing. */
1919 memset(&routing, 0, sizeof(routing));
1920 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1924 case KVM_SET_DEVICE_ATTR: {
1926 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1928 r = kvm_s390_vm_set_attr(kvm, &attr);
1931 case KVM_GET_DEVICE_ATTR: {
1933 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1935 r = kvm_s390_vm_get_attr(kvm, &attr);
1938 case KVM_HAS_DEVICE_ATTR: {
1940 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1942 r = kvm_s390_vm_has_attr(kvm, &attr);
1945 case KVM_S390_GET_SKEYS: {
1946 struct kvm_s390_skeys args;
1949 if (copy_from_user(&args, argp,
1950 sizeof(struct kvm_s390_skeys)))
1952 r = kvm_s390_get_skeys(kvm, &args);
1955 case KVM_S390_SET_SKEYS: {
1956 struct kvm_s390_skeys args;
1959 if (copy_from_user(&args, argp,
1960 sizeof(struct kvm_s390_skeys)))
1962 r = kvm_s390_set_skeys(kvm, &args);
1965 case KVM_S390_GET_CMMA_BITS: {
1966 struct kvm_s390_cmma_log args;
1969 if (copy_from_user(&args, argp, sizeof(args)))
1971 mutex_lock(&kvm->slots_lock);
1972 r = kvm_s390_get_cmma_bits(kvm, &args);
1973 mutex_unlock(&kvm->slots_lock);
1975 r = copy_to_user(argp, &args, sizeof(args));
1981 case KVM_S390_SET_CMMA_BITS: {
1982 struct kvm_s390_cmma_log args;
1985 if (copy_from_user(&args, argp, sizeof(args)))
1987 mutex_lock(&kvm->slots_lock);
1988 r = kvm_s390_set_cmma_bits(kvm, &args);
1989 mutex_unlock(&kvm->slots_lock);
1999 static int kvm_s390_apxa_installed(void)
2001 struct ap_config_info info;
2003 if (ap_instructions_available()) {
2004 if (ap_qci(&info) == 0)
2012 * The format of the crypto control block (CRYCB) is specified in the 3 low
2013 * order bits of the CRYCB designation (CRYCBD) field as follows:
2014 * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2015 * AP extended addressing (APXA) facility are installed.
2016 * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2017 * Format 2: Both the APXA and MSAX3 facilities are installed
2019 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2021 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2023 /* Clear the CRYCB format bits - i.e., set format 0 by default */
2024 kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2026 /* Check whether MSAX3 is installed */
2027 if (!test_kvm_facility(kvm, 76))
2030 if (kvm_s390_apxa_installed())
2031 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2033 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2036 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2038 mutex_lock(&kvm->lock);
2039 kvm_s390_vcpu_block_all(kvm);
2041 memset(&kvm->arch.crypto.crycb->apcb0, 0,
2042 sizeof(kvm->arch.crypto.crycb->apcb0));
2043 memset(&kvm->arch.crypto.crycb->apcb1, 0,
2044 sizeof(kvm->arch.crypto.crycb->apcb1));
2046 /* recreate the shadow crycb for each vcpu */
2047 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2048 kvm_s390_vcpu_unblock_all(kvm);
2049 mutex_unlock(&kvm->lock);
2051 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2053 static u64 kvm_s390_get_initial_cpuid(void)
2058 cpuid.version = 0xff;
2059 return *((u64 *) &cpuid);
2062 static void kvm_s390_crypto_init(struct kvm *kvm)
2064 kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2065 kvm_s390_set_crycb_format(kvm);
2067 if (!test_kvm_facility(kvm, 76))
2070 /* Enable AES/DEA protected key functions by default */
2071 kvm->arch.crypto.aes_kw = 1;
2072 kvm->arch.crypto.dea_kw = 1;
2073 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2074 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2075 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2076 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2079 static void sca_dispose(struct kvm *kvm)
2081 if (kvm->arch.use_esca)
2082 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2084 free_page((unsigned long)(kvm->arch.sca));
2085 kvm->arch.sca = NULL;
2088 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2090 gfp_t alloc_flags = GFP_KERNEL;
2092 char debug_name[16];
2093 static unsigned long sca_offset;
2096 #ifdef CONFIG_KVM_S390_UCONTROL
2097 if (type & ~KVM_VM_S390_UCONTROL)
2099 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2106 rc = s390_enable_sie();
2112 if (!sclp.has_64bscao)
2113 alloc_flags |= GFP_DMA;
2114 rwlock_init(&kvm->arch.sca_lock);
2115 /* start with basic SCA */
2116 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2119 spin_lock(&kvm_lock);
2121 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2123 kvm->arch.sca = (struct bsca_block *)
2124 ((char *) kvm->arch.sca + sca_offset);
2125 spin_unlock(&kvm_lock);
2127 sprintf(debug_name, "kvm-%u", current->pid);
2129 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2133 BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2134 kvm->arch.sie_page2 =
2135 (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2136 if (!kvm->arch.sie_page2)
2139 kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2141 for (i = 0; i < kvm_s390_fac_size(); i++) {
2142 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2143 (kvm_s390_fac_base[i] |
2144 kvm_s390_fac_ext[i]);
2145 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2146 kvm_s390_fac_base[i];
2149 /* we are always in czam mode - even on pre z14 machines */
2150 set_kvm_facility(kvm->arch.model.fac_mask, 138);
2151 set_kvm_facility(kvm->arch.model.fac_list, 138);
2152 /* we emulate STHYI in kvm */
2153 set_kvm_facility(kvm->arch.model.fac_mask, 74);
2154 set_kvm_facility(kvm->arch.model.fac_list, 74);
2155 if (MACHINE_HAS_TLB_GUEST) {
2156 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2157 set_kvm_facility(kvm->arch.model.fac_list, 147);
2160 kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2161 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2163 kvm_s390_crypto_init(kvm);
2165 mutex_init(&kvm->arch.float_int.ais_lock);
2166 spin_lock_init(&kvm->arch.float_int.lock);
2167 for (i = 0; i < FIRQ_LIST_COUNT; i++)
2168 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2169 init_waitqueue_head(&kvm->arch.ipte_wq);
2170 mutex_init(&kvm->arch.ipte_mutex);
2172 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2173 VM_EVENT(kvm, 3, "vm created with type %lu", type);
2175 if (type & KVM_VM_S390_UCONTROL) {
2176 kvm->arch.gmap = NULL;
2177 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2179 if (sclp.hamax == U64_MAX)
2180 kvm->arch.mem_limit = TASK_SIZE_MAX;
2182 kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2184 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2185 if (!kvm->arch.gmap)
2187 kvm->arch.gmap->private = kvm;
2188 kvm->arch.gmap->pfault_enabled = 0;
2191 kvm->arch.use_pfmfi = sclp.has_pfmfi;
2192 kvm->arch.use_skf = sclp.has_skey;
2193 spin_lock_init(&kvm->arch.start_stop_lock);
2194 kvm_s390_vsie_init(kvm);
2195 kvm_s390_gisa_init(kvm);
2196 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2200 free_page((unsigned long)kvm->arch.sie_page2);
2201 debug_unregister(kvm->arch.dbf);
2203 KVM_EVENT(3, "creation of vm failed: %d", rc);
2207 bool kvm_arch_has_vcpu_debugfs(void)
2212 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2217 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2219 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2220 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2221 kvm_s390_clear_local_irqs(vcpu);
2222 kvm_clear_async_pf_completion_queue(vcpu);
2223 if (!kvm_is_ucontrol(vcpu->kvm))
2226 if (kvm_is_ucontrol(vcpu->kvm))
2227 gmap_remove(vcpu->arch.gmap);
2229 if (vcpu->kvm->arch.use_cmma)
2230 kvm_s390_vcpu_unsetup_cmma(vcpu);
2231 free_page((unsigned long)(vcpu->arch.sie_block));
2233 kvm_vcpu_uninit(vcpu);
2234 kmem_cache_free(kvm_vcpu_cache, vcpu);
2237 static void kvm_free_vcpus(struct kvm *kvm)
2240 struct kvm_vcpu *vcpu;
2242 kvm_for_each_vcpu(i, vcpu, kvm)
2243 kvm_arch_vcpu_destroy(vcpu);
2245 mutex_lock(&kvm->lock);
2246 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2247 kvm->vcpus[i] = NULL;
2249 atomic_set(&kvm->online_vcpus, 0);
2250 mutex_unlock(&kvm->lock);
2253 void kvm_arch_destroy_vm(struct kvm *kvm)
2255 kvm_free_vcpus(kvm);
2257 debug_unregister(kvm->arch.dbf);
2258 kvm_s390_gisa_destroy(kvm);
2259 free_page((unsigned long)kvm->arch.sie_page2);
2260 if (!kvm_is_ucontrol(kvm))
2261 gmap_remove(kvm->arch.gmap);
2262 kvm_s390_destroy_adapters(kvm);
2263 kvm_s390_clear_float_irqs(kvm);
2264 kvm_s390_vsie_destroy(kvm);
2265 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2268 /* Section: vcpu related */
2269 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2271 vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2272 if (!vcpu->arch.gmap)
2274 vcpu->arch.gmap->private = vcpu->kvm;
2279 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2281 if (!kvm_s390_use_sca_entries())
2283 read_lock(&vcpu->kvm->arch.sca_lock);
2284 if (vcpu->kvm->arch.use_esca) {
2285 struct esca_block *sca = vcpu->kvm->arch.sca;
2287 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2288 sca->cpu[vcpu->vcpu_id].sda = 0;
2290 struct bsca_block *sca = vcpu->kvm->arch.sca;
2292 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2293 sca->cpu[vcpu->vcpu_id].sda = 0;
2295 read_unlock(&vcpu->kvm->arch.sca_lock);
2298 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2300 if (!kvm_s390_use_sca_entries()) {
2301 struct bsca_block *sca = vcpu->kvm->arch.sca;
2303 /* we still need the basic sca for the ipte control */
2304 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2305 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2308 read_lock(&vcpu->kvm->arch.sca_lock);
2309 if (vcpu->kvm->arch.use_esca) {
2310 struct esca_block *sca = vcpu->kvm->arch.sca;
2312 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2313 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2314 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2315 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2316 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2318 struct bsca_block *sca = vcpu->kvm->arch.sca;
2320 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2321 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2322 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2323 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2325 read_unlock(&vcpu->kvm->arch.sca_lock);
2328 /* Basic SCA to Extended SCA data copy routines */
2329 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2332 d->sigp_ctrl.c = s->sigp_ctrl.c;
2333 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2336 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2340 d->ipte_control = s->ipte_control;
2342 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2343 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2346 static int sca_switch_to_extended(struct kvm *kvm)
2348 struct bsca_block *old_sca = kvm->arch.sca;
2349 struct esca_block *new_sca;
2350 struct kvm_vcpu *vcpu;
2351 unsigned int vcpu_idx;
2354 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2358 scaoh = (u32)((u64)(new_sca) >> 32);
2359 scaol = (u32)(u64)(new_sca) & ~0x3fU;
2361 kvm_s390_vcpu_block_all(kvm);
2362 write_lock(&kvm->arch.sca_lock);
2364 sca_copy_b_to_e(new_sca, old_sca);
2366 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2367 vcpu->arch.sie_block->scaoh = scaoh;
2368 vcpu->arch.sie_block->scaol = scaol;
2369 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2371 kvm->arch.sca = new_sca;
2372 kvm->arch.use_esca = 1;
2374 write_unlock(&kvm->arch.sca_lock);
2375 kvm_s390_vcpu_unblock_all(kvm);
2377 free_page((unsigned long)old_sca);
2379 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2380 old_sca, kvm->arch.sca);
2384 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2388 if (!kvm_s390_use_sca_entries()) {
2389 if (id < KVM_MAX_VCPUS)
2393 if (id < KVM_S390_BSCA_CPU_SLOTS)
2395 if (!sclp.has_esca || !sclp.has_64bscao)
2398 mutex_lock(&kvm->lock);
2399 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2400 mutex_unlock(&kvm->lock);
2402 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2405 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2407 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2408 kvm_clear_async_pf_completion_queue(vcpu);
2409 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2415 kvm_s390_set_prefix(vcpu, 0);
2416 if (test_kvm_facility(vcpu->kvm, 64))
2417 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2418 if (test_kvm_facility(vcpu->kvm, 82))
2419 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2420 if (test_kvm_facility(vcpu->kvm, 133))
2421 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2422 if (test_kvm_facility(vcpu->kvm, 156))
2423 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2424 /* fprs can be synchronized via vrs, even if the guest has no vx. With
2425 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2428 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2430 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2432 if (kvm_is_ucontrol(vcpu->kvm))
2433 return __kvm_ucontrol_vcpu_init(vcpu);
2438 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2439 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2441 WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2442 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2443 vcpu->arch.cputm_start = get_tod_clock_fast();
2444 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2447 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2448 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2450 WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2451 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2452 vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2453 vcpu->arch.cputm_start = 0;
2454 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2457 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2458 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2460 WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2461 vcpu->arch.cputm_enabled = true;
2462 __start_cpu_timer_accounting(vcpu);
2465 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2466 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2468 WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2469 __stop_cpu_timer_accounting(vcpu);
2470 vcpu->arch.cputm_enabled = false;
2473 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2475 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2476 __enable_cpu_timer_accounting(vcpu);
2480 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2482 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2483 __disable_cpu_timer_accounting(vcpu);
2487 /* set the cpu timer - may only be called from the VCPU thread itself */
2488 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2490 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2491 raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2492 if (vcpu->arch.cputm_enabled)
2493 vcpu->arch.cputm_start = get_tod_clock_fast();
2494 vcpu->arch.sie_block->cputm = cputm;
2495 raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2499 /* update and get the cpu timer - can also be called from other VCPU threads */
2500 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2505 if (unlikely(!vcpu->arch.cputm_enabled))
2506 return vcpu->arch.sie_block->cputm;
2508 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2510 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2512 * If the writer would ever execute a read in the critical
2513 * section, e.g. in irq context, we have a deadlock.
2515 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2516 value = vcpu->arch.sie_block->cputm;
2517 /* if cputm_start is 0, accounting is being started/stopped */
2518 if (likely(vcpu->arch.cputm_start))
2519 value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2520 } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2525 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2528 gmap_enable(vcpu->arch.enabled_gmap);
2529 kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2530 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2531 __start_cpu_timer_accounting(vcpu);
2535 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2538 if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2539 __stop_cpu_timer_accounting(vcpu);
2540 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2541 vcpu->arch.enabled_gmap = gmap_get_enabled();
2542 gmap_disable(vcpu->arch.enabled_gmap);
2546 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2548 /* this equals initial cpu reset in pop, but we don't switch to ESA */
2549 vcpu->arch.sie_block->gpsw.mask = 0UL;
2550 vcpu->arch.sie_block->gpsw.addr = 0UL;
2551 kvm_s390_set_prefix(vcpu, 0);
2552 kvm_s390_set_cpu_timer(vcpu, 0);
2553 vcpu->arch.sie_block->ckc = 0UL;
2554 vcpu->arch.sie_block->todpr = 0;
2555 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2556 vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
2557 CR0_INTERRUPT_KEY_SUBMASK |
2558 CR0_MEASUREMENT_ALERT_SUBMASK;
2559 vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2561 CR14_EXTERNAL_DAMAGE_SUBMASK;
2562 /* make sure the new fpc will be lazily loaded */
2564 current->thread.fpu.fpc = 0;
2565 vcpu->arch.sie_block->gbea = 1;
2566 vcpu->arch.sie_block->pp = 0;
2567 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2568 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2569 kvm_clear_async_pf_completion_queue(vcpu);
2570 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2571 kvm_s390_vcpu_stop(vcpu);
2572 kvm_s390_clear_local_irqs(vcpu);
2575 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2577 mutex_lock(&vcpu->kvm->lock);
2579 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2580 vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2582 mutex_unlock(&vcpu->kvm->lock);
2583 if (!kvm_is_ucontrol(vcpu->kvm)) {
2584 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2587 if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2588 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2589 /* make vcpu_load load the right gmap on the first trigger */
2590 vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2593 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2596 * If the AP instructions are not being interpreted and the MSAX3
2597 * facility is not configured for the guest, there is nothing to set up.
2599 if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2602 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2603 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2605 if (vcpu->kvm->arch.crypto.apie)
2606 vcpu->arch.sie_block->eca |= ECA_APIE;
2608 /* Set up protected key support */
2609 if (vcpu->kvm->arch.crypto.aes_kw)
2610 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2611 if (vcpu->kvm->arch.crypto.dea_kw)
2612 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2615 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2617 free_page(vcpu->arch.sie_block->cbrlo);
2618 vcpu->arch.sie_block->cbrlo = 0;
2621 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2623 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2624 if (!vcpu->arch.sie_block->cbrlo)
2629 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2631 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2633 vcpu->arch.sie_block->ibc = model->ibc;
2634 if (test_kvm_facility(vcpu->kvm, 7))
2635 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2638 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2642 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2646 if (test_kvm_facility(vcpu->kvm, 78))
2647 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2648 else if (test_kvm_facility(vcpu->kvm, 8))
2649 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2651 kvm_s390_vcpu_setup_model(vcpu);
2653 /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2654 if (MACHINE_HAS_ESOP)
2655 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2656 if (test_kvm_facility(vcpu->kvm, 9))
2657 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2658 if (test_kvm_facility(vcpu->kvm, 73))
2659 vcpu->arch.sie_block->ecb |= ECB_TE;
2661 if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2662 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2663 if (test_kvm_facility(vcpu->kvm, 130))
2664 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2665 vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2667 vcpu->arch.sie_block->eca |= ECA_CEI;
2669 vcpu->arch.sie_block->eca |= ECA_IB;
2671 vcpu->arch.sie_block->eca |= ECA_SII;
2672 if (sclp.has_sigpif)
2673 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2674 if (test_kvm_facility(vcpu->kvm, 129)) {
2675 vcpu->arch.sie_block->eca |= ECA_VX;
2676 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2678 if (test_kvm_facility(vcpu->kvm, 139))
2679 vcpu->arch.sie_block->ecd |= ECD_MEF;
2680 if (test_kvm_facility(vcpu->kvm, 156))
2681 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2682 if (vcpu->arch.sie_block->gd) {
2683 vcpu->arch.sie_block->eca |= ECA_AIV;
2684 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2685 vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2687 vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2689 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2692 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2694 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2696 if (vcpu->kvm->arch.use_cmma) {
2697 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2701 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2702 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2704 kvm_s390_vcpu_crypto_setup(vcpu);
2709 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2712 struct kvm_vcpu *vcpu;
2713 struct sie_page *sie_page;
2716 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2721 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2725 BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2726 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2730 vcpu->arch.sie_block = &sie_page->sie_block;
2731 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2733 /* the real guest size will always be smaller than msl */
2734 vcpu->arch.sie_block->mso = 0;
2735 vcpu->arch.sie_block->msl = sclp.hamax;
2737 vcpu->arch.sie_block->icpua = id;
2738 spin_lock_init(&vcpu->arch.local_int.lock);
2739 vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2740 if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2741 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2742 seqcount_init(&vcpu->arch.cputm_seqcount);
2744 rc = kvm_vcpu_init(vcpu, kvm, id);
2746 goto out_free_sie_block;
2747 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2748 vcpu->arch.sie_block);
2749 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2753 free_page((unsigned long)(vcpu->arch.sie_block));
2755 kmem_cache_free(kvm_vcpu_cache, vcpu);
2760 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2762 return kvm_s390_vcpu_has_irq(vcpu, 0);
2765 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2767 return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2770 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2772 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2776 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2778 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2781 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2783 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2787 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2789 return atomic_read(&vcpu->arch.sie_block->prog20) &
2790 (PROG_BLOCK_SIE | PROG_REQUEST);
2793 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2795 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2799 * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2800 * If the CPU is not running (e.g. waiting as idle) the function will
2801 * return immediately. */
2802 void exit_sie(struct kvm_vcpu *vcpu)
2804 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2805 kvm_s390_vsie_kick(vcpu);
2806 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2810 /* Kick a guest cpu out of SIE to process a request synchronously */
2811 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2813 kvm_make_request(req, vcpu);
2814 kvm_s390_vcpu_request(vcpu);
2817 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2820 struct kvm *kvm = gmap->private;
2821 struct kvm_vcpu *vcpu;
2822 unsigned long prefix;
2825 if (gmap_is_shadow(gmap))
2827 if (start >= 1UL << 31)
2828 /* We are only interested in prefix pages */
2830 kvm_for_each_vcpu(i, vcpu, kvm) {
2831 /* match against both prefix pages */
2832 prefix = kvm_s390_get_prefix(vcpu);
2833 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2834 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2836 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2841 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2843 /* kvm common code refers to this, but never calls it */
2848 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2849 struct kvm_one_reg *reg)
2854 case KVM_REG_S390_TODPR:
2855 r = put_user(vcpu->arch.sie_block->todpr,
2856 (u32 __user *)reg->addr);
2858 case KVM_REG_S390_EPOCHDIFF:
2859 r = put_user(vcpu->arch.sie_block->epoch,
2860 (u64 __user *)reg->addr);
2862 case KVM_REG_S390_CPU_TIMER:
2863 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2864 (u64 __user *)reg->addr);
2866 case KVM_REG_S390_CLOCK_COMP:
2867 r = put_user(vcpu->arch.sie_block->ckc,
2868 (u64 __user *)reg->addr);
2870 case KVM_REG_S390_PFTOKEN:
2871 r = put_user(vcpu->arch.pfault_token,
2872 (u64 __user *)reg->addr);
2874 case KVM_REG_S390_PFCOMPARE:
2875 r = put_user(vcpu->arch.pfault_compare,
2876 (u64 __user *)reg->addr);
2878 case KVM_REG_S390_PFSELECT:
2879 r = put_user(vcpu->arch.pfault_select,
2880 (u64 __user *)reg->addr);
2882 case KVM_REG_S390_PP:
2883 r = put_user(vcpu->arch.sie_block->pp,
2884 (u64 __user *)reg->addr);
2886 case KVM_REG_S390_GBEA:
2887 r = put_user(vcpu->arch.sie_block->gbea,
2888 (u64 __user *)reg->addr);
2897 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2898 struct kvm_one_reg *reg)
2904 case KVM_REG_S390_TODPR:
2905 r = get_user(vcpu->arch.sie_block->todpr,
2906 (u32 __user *)reg->addr);
2908 case KVM_REG_S390_EPOCHDIFF:
2909 r = get_user(vcpu->arch.sie_block->epoch,
2910 (u64 __user *)reg->addr);
2912 case KVM_REG_S390_CPU_TIMER:
2913 r = get_user(val, (u64 __user *)reg->addr);
2915 kvm_s390_set_cpu_timer(vcpu, val);
2917 case KVM_REG_S390_CLOCK_COMP:
2918 r = get_user(vcpu->arch.sie_block->ckc,
2919 (u64 __user *)reg->addr);
2921 case KVM_REG_S390_PFTOKEN:
2922 r = get_user(vcpu->arch.pfault_token,
2923 (u64 __user *)reg->addr);
2924 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2925 kvm_clear_async_pf_completion_queue(vcpu);
2927 case KVM_REG_S390_PFCOMPARE:
2928 r = get_user(vcpu->arch.pfault_compare,
2929 (u64 __user *)reg->addr);
2931 case KVM_REG_S390_PFSELECT:
2932 r = get_user(vcpu->arch.pfault_select,
2933 (u64 __user *)reg->addr);
2935 case KVM_REG_S390_PP:
2936 r = get_user(vcpu->arch.sie_block->pp,
2937 (u64 __user *)reg->addr);
2939 case KVM_REG_S390_GBEA:
2940 r = get_user(vcpu->arch.sie_block->gbea,
2941 (u64 __user *)reg->addr);
2950 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2952 kvm_s390_vcpu_initial_reset(vcpu);
2956 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2959 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs));
2964 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2967 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2972 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2973 struct kvm_sregs *sregs)
2977 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2978 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2984 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2985 struct kvm_sregs *sregs)
2989 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2990 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2996 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3002 if (test_fp_ctl(fpu->fpc)) {
3006 vcpu->run->s.regs.fpc = fpu->fpc;
3008 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3009 (freg_t *) fpu->fprs);
3011 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3018 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3022 /* make sure we have the latest values */
3025 convert_vx_to_fp((freg_t *) fpu->fprs,
3026 (__vector128 *) vcpu->run->s.regs.vrs);
3028 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3029 fpu->fpc = vcpu->run->s.regs.fpc;
3035 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3039 if (!is_vcpu_stopped(vcpu))
3042 vcpu->run->psw_mask = psw.mask;
3043 vcpu->run->psw_addr = psw.addr;
3048 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3049 struct kvm_translation *tr)
3051 return -EINVAL; /* not implemented yet */
3054 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3055 KVM_GUESTDBG_USE_HW_BP | \
3056 KVM_GUESTDBG_ENABLE)
3058 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3059 struct kvm_guest_debug *dbg)
3065 vcpu->guest_debug = 0;
3066 kvm_s390_clear_bp_data(vcpu);
3068 if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3072 if (!sclp.has_gpere) {
3077 if (dbg->control & KVM_GUESTDBG_ENABLE) {
3078 vcpu->guest_debug = dbg->control;
3079 /* enforce guest PER */
3080 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3082 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3083 rc = kvm_s390_import_bp_data(vcpu, dbg);
3085 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3086 vcpu->arch.guestdbg.last_bp = 0;
3090 vcpu->guest_debug = 0;
3091 kvm_s390_clear_bp_data(vcpu);
3092 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3100 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3101 struct kvm_mp_state *mp_state)
3107 /* CHECK_STOP and LOAD are not supported yet */
3108 ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3109 KVM_MP_STATE_OPERATING;
3115 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3116 struct kvm_mp_state *mp_state)
3122 /* user space knows about this interface - let it control the state */
3123 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3125 switch (mp_state->mp_state) {
3126 case KVM_MP_STATE_STOPPED:
3127 kvm_s390_vcpu_stop(vcpu);
3129 case KVM_MP_STATE_OPERATING:
3130 kvm_s390_vcpu_start(vcpu);
3132 case KVM_MP_STATE_LOAD:
3133 case KVM_MP_STATE_CHECK_STOP:
3134 /* fall through - CHECK_STOP and LOAD are not supported yet */
3143 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3145 return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3148 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3151 kvm_s390_vcpu_request_handled(vcpu);
3152 if (!kvm_request_pending(vcpu))
3155 * We use MMU_RELOAD just to re-arm the ipte notifier for the
3156 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3157 * This ensures that the ipte instruction for this request has
3158 * already finished. We might race against a second unmapper that
3159 * wants to set the blocking bit. Lets just retry the request loop.
3161 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3163 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3164 kvm_s390_get_prefix(vcpu),
3165 PAGE_SIZE * 2, PROT_WRITE);
3167 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3173 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3174 vcpu->arch.sie_block->ihcpu = 0xffff;
3178 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3179 if (!ibs_enabled(vcpu)) {
3180 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3181 kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3186 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3187 if (ibs_enabled(vcpu)) {
3188 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3189 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3194 if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3195 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3199 if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3201 * Disable CMM virtualization; we will emulate the ESSA
3202 * instruction manually, in order to provide additional
3203 * functionalities needed for live migration.
3205 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3209 if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3211 * Re-enable CMM virtualization if CMMA is available and
3212 * CMM has been used.
3214 if ((vcpu->kvm->arch.use_cmma) &&
3215 (vcpu->kvm->mm->context.uses_cmm))
3216 vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3220 /* nothing to do, just clear the request */
3221 kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3222 /* we left the vsie handler, nothing to do, just clear the request */
3223 kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3228 void kvm_s390_set_tod_clock(struct kvm *kvm,
3229 const struct kvm_s390_vm_tod_clock *gtod)
3231 struct kvm_vcpu *vcpu;
3232 struct kvm_s390_tod_clock_ext htod;
3235 mutex_lock(&kvm->lock);
3238 get_tod_clock_ext((char *)&htod);
3240 kvm->arch.epoch = gtod->tod - htod.tod;
3242 if (test_kvm_facility(kvm, 139)) {
3243 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3244 if (kvm->arch.epoch > gtod->tod)
3245 kvm->arch.epdx -= 1;
3248 kvm_s390_vcpu_block_all(kvm);
3249 kvm_for_each_vcpu(i, vcpu, kvm) {
3250 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3251 vcpu->arch.sie_block->epdx = kvm->arch.epdx;
3254 kvm_s390_vcpu_unblock_all(kvm);
3256 mutex_unlock(&kvm->lock);
3260 * kvm_arch_fault_in_page - fault-in guest page if necessary
3261 * @vcpu: The corresponding virtual cpu
3262 * @gpa: Guest physical address
3263 * @writable: Whether the page should be writable or not
3265 * Make sure that a guest page has been faulted-in on the host.
3267 * Return: Zero on success, negative error code otherwise.
3269 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3271 return gmap_fault(vcpu->arch.gmap, gpa,
3272 writable ? FAULT_FLAG_WRITE : 0);
3275 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3276 unsigned long token)
3278 struct kvm_s390_interrupt inti;
3279 struct kvm_s390_irq irq;
3282 irq.u.ext.ext_params2 = token;
3283 irq.type = KVM_S390_INT_PFAULT_INIT;
3284 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3286 inti.type = KVM_S390_INT_PFAULT_DONE;
3287 inti.parm64 = token;
3288 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3292 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3293 struct kvm_async_pf *work)
3295 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3296 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3299 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3300 struct kvm_async_pf *work)
3302 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3303 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3306 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3307 struct kvm_async_pf *work)
3309 /* s390 will always inject the page directly */
3312 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3315 * s390 will always inject the page directly,
3316 * but we still want check_async_completion to cleanup
3321 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3324 struct kvm_arch_async_pf arch;
3327 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3329 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3330 vcpu->arch.pfault_compare)
3332 if (psw_extint_disabled(vcpu))
3334 if (kvm_s390_vcpu_has_irq(vcpu, 0))
3336 if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3338 if (!vcpu->arch.gmap->pfault_enabled)
3341 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3342 hva += current->thread.gmap_addr & ~PAGE_MASK;
3343 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3346 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3350 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3355 * On s390 notifications for arriving pages will be delivered directly
3356 * to the guest but the house keeping for completed pfaults is
3357 * handled outside the worker.
3359 kvm_check_async_pf_completion(vcpu);
3361 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3362 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3367 if (test_cpu_flag(CIF_MCCK_PENDING))
3370 if (!kvm_is_ucontrol(vcpu->kvm)) {
3371 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3376 rc = kvm_s390_handle_requests(vcpu);
3380 if (guestdbg_enabled(vcpu)) {
3381 kvm_s390_backup_guest_per_regs(vcpu);
3382 kvm_s390_patch_guest_per_regs(vcpu);
3385 vcpu->arch.sie_block->icptcode = 0;
3386 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3387 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3388 trace_kvm_s390_sie_enter(vcpu, cpuflags);
3393 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3395 struct kvm_s390_pgm_info pgm_info = {
3396 .code = PGM_ADDRESSING,
3401 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3402 trace_kvm_s390_sie_fault(vcpu);
3405 * We want to inject an addressing exception, which is defined as a
3406 * suppressing or terminating exception. However, since we came here
3407 * by a DAT access exception, the PSW still points to the faulting
3408 * instruction since DAT exceptions are nullifying. So we've got
3409 * to look up the current opcode to get the length of the instruction
3410 * to be able to forward the PSW.
3412 rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3413 ilen = insn_length(opcode);
3417 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3418 * Forward by arbitrary ilc, injection will take care of
3419 * nullification if necessary.
3421 pgm_info = vcpu->arch.pgm;
3424 pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3425 kvm_s390_forward_psw(vcpu, ilen);
3426 return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3429 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3431 struct mcck_volatile_info *mcck_info;
3432 struct sie_page *sie_page;
3434 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3435 vcpu->arch.sie_block->icptcode);
3436 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3438 if (guestdbg_enabled(vcpu))
3439 kvm_s390_restore_guest_per_regs(vcpu);
3441 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3442 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3444 if (exit_reason == -EINTR) {
3445 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3446 sie_page = container_of(vcpu->arch.sie_block,
3447 struct sie_page, sie_block);
3448 mcck_info = &sie_page->mcck_info;
3449 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3453 if (vcpu->arch.sie_block->icptcode > 0) {
3454 int rc = kvm_handle_sie_intercept(vcpu);
3456 if (rc != -EOPNOTSUPP)
3458 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3459 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3460 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3461 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3463 } else if (exit_reason != -EFAULT) {
3464 vcpu->stat.exit_null++;
3466 } else if (kvm_is_ucontrol(vcpu->kvm)) {
3467 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3468 vcpu->run->s390_ucontrol.trans_exc_code =
3469 current->thread.gmap_addr;
3470 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3472 } else if (current->thread.gmap_pfault) {
3473 trace_kvm_s390_major_guest_pfault(vcpu);
3474 current->thread.gmap_pfault = 0;
3475 if (kvm_arch_setup_async_pf(vcpu))
3477 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3479 return vcpu_post_run_fault_in_sie(vcpu);
3482 static int __vcpu_run(struct kvm_vcpu *vcpu)
3484 int rc, exit_reason;
3487 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3488 * ning the guest), so that memslots (and other stuff) are protected
3490 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3493 rc = vcpu_pre_run(vcpu);
3497 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3499 * As PF_VCPU will be used in fault handler, between
3500 * guest_enter and guest_exit should be no uaccess.
3502 local_irq_disable();
3503 guest_enter_irqoff();
3504 __disable_cpu_timer_accounting(vcpu);
3506 exit_reason = sie64a(vcpu->arch.sie_block,
3507 vcpu->run->s.regs.gprs);
3508 local_irq_disable();
3509 __enable_cpu_timer_accounting(vcpu);
3510 guest_exit_irqoff();
3512 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3514 rc = vcpu_post_run(vcpu, exit_reason);
3515 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3517 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3521 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3523 struct runtime_instr_cb *riccb;
3526 riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3527 gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3528 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3529 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3530 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3531 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3532 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3533 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3534 /* some control register changes require a tlb flush */
3535 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3537 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3538 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3539 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3540 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3541 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3542 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3544 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3545 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3546 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3547 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3548 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3549 kvm_clear_async_pf_completion_queue(vcpu);
3552 * If userspace sets the riccb (e.g. after migration) to a valid state,
3553 * we should enable RI here instead of doing the lazy enablement.
3555 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3556 test_kvm_facility(vcpu->kvm, 64) &&
3558 !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3559 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3560 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3563 * If userspace sets the gscb (e.g. after migration) to non-zero,
3564 * we should enable GS here instead of doing the lazy enablement.
3566 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3567 test_kvm_facility(vcpu->kvm, 133) &&
3569 !vcpu->arch.gs_enabled) {
3570 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3571 vcpu->arch.sie_block->ecb |= ECB_GS;
3572 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3573 vcpu->arch.gs_enabled = 1;
3575 if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3576 test_kvm_facility(vcpu->kvm, 82)) {
3577 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3578 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3580 save_access_regs(vcpu->arch.host_acrs);
3581 restore_access_regs(vcpu->run->s.regs.acrs);
3582 /* save host (userspace) fprs/vrs */
3584 vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3585 vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3587 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3589 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3590 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3591 if (test_fp_ctl(current->thread.fpu.fpc))
3592 /* User space provided an invalid FPC, let's clear it */
3593 current->thread.fpu.fpc = 0;
3594 if (MACHINE_HAS_GS) {
3596 __ctl_set_bit(2, 4);
3597 if (current->thread.gs_cb) {
3598 vcpu->arch.host_gscb = current->thread.gs_cb;
3599 save_gs_cb(vcpu->arch.host_gscb);
3601 if (vcpu->arch.gs_enabled) {
3602 current->thread.gs_cb = (struct gs_cb *)
3603 &vcpu->run->s.regs.gscb;
3604 restore_gs_cb(current->thread.gs_cb);
3608 /* SIE will load etoken directly from SDNX and therefore kvm_run */
3610 kvm_run->kvm_dirty_regs = 0;
3613 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3615 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3616 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3617 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3618 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3619 kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3620 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3621 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3622 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3623 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3624 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3625 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3626 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3627 kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3628 save_access_regs(vcpu->run->s.regs.acrs);
3629 restore_access_regs(vcpu->arch.host_acrs);
3630 /* Save guest register state */
3632 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3633 /* Restore will be done lazily at return */
3634 current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3635 current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3636 if (MACHINE_HAS_GS) {
3637 __ctl_set_bit(2, 4);
3638 if (vcpu->arch.gs_enabled)
3639 save_gs_cb(current->thread.gs_cb);
3641 current->thread.gs_cb = vcpu->arch.host_gscb;
3642 restore_gs_cb(vcpu->arch.host_gscb);
3644 if (!vcpu->arch.host_gscb)
3645 __ctl_clear_bit(2, 4);
3646 vcpu->arch.host_gscb = NULL;
3648 /* SIE will save etoken directly into SDNX and therefore kvm_run */
3651 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3655 if (kvm_run->immediate_exit)
3660 if (guestdbg_exit_pending(vcpu)) {
3661 kvm_s390_prepare_debug_exit(vcpu);
3666 kvm_sigset_activate(vcpu);
3668 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3669 kvm_s390_vcpu_start(vcpu);
3670 } else if (is_vcpu_stopped(vcpu)) {
3671 pr_err_ratelimited("can't run stopped vcpu %d\n",
3677 sync_regs(vcpu, kvm_run);
3678 enable_cpu_timer_accounting(vcpu);
3681 rc = __vcpu_run(vcpu);
3683 if (signal_pending(current) && !rc) {
3684 kvm_run->exit_reason = KVM_EXIT_INTR;
3688 if (guestdbg_exit_pending(vcpu) && !rc) {
3689 kvm_s390_prepare_debug_exit(vcpu);
3693 if (rc == -EREMOTE) {
3694 /* userspace support is needed, kvm_run has been prepared */
3698 disable_cpu_timer_accounting(vcpu);
3699 store_regs(vcpu, kvm_run);
3701 kvm_sigset_deactivate(vcpu);
3703 vcpu->stat.exit_userspace++;
3710 * store status at address
3711 * we use have two special cases:
3712 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3713 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3715 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3717 unsigned char archmode = 1;
3718 freg_t fprs[NUM_FPRS];
3723 px = kvm_s390_get_prefix(vcpu);
3724 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3725 if (write_guest_abs(vcpu, 163, &archmode, 1))
3728 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3729 if (write_guest_real(vcpu, 163, &archmode, 1))
3733 gpa -= __LC_FPREGS_SAVE_AREA;
3735 /* manually convert vector registers if necessary */
3736 if (MACHINE_HAS_VX) {
3737 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3738 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3741 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3742 vcpu->run->s.regs.fprs, 128);
3744 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3745 vcpu->run->s.regs.gprs, 128);
3746 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3747 &vcpu->arch.sie_block->gpsw, 16);
3748 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3750 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3751 &vcpu->run->s.regs.fpc, 4);
3752 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3753 &vcpu->arch.sie_block->todpr, 4);
3754 cputm = kvm_s390_get_cpu_timer(vcpu);
3755 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3757 clkcomp = vcpu->arch.sie_block->ckc >> 8;
3758 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3760 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3761 &vcpu->run->s.regs.acrs, 64);
3762 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3763 &vcpu->arch.sie_block->gcr, 128);
3764 return rc ? -EFAULT : 0;
3767 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3770 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3771 * switch in the run ioctl. Let's update our copies before we save
3772 * it into the save area
3775 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3776 save_access_regs(vcpu->run->s.regs.acrs);
3778 return kvm_s390_store_status_unloaded(vcpu, addr);
3781 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3783 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3784 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3787 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3790 struct kvm_vcpu *vcpu;
3792 kvm_for_each_vcpu(i, vcpu, kvm) {
3793 __disable_ibs_on_vcpu(vcpu);
3797 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3801 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3802 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3805 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3807 int i, online_vcpus, started_vcpus = 0;
3809 if (!is_vcpu_stopped(vcpu))
3812 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3813 /* Only one cpu at a time may enter/leave the STOPPED state. */
3814 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3815 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3817 for (i = 0; i < online_vcpus; i++) {
3818 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3822 if (started_vcpus == 0) {
3823 /* we're the only active VCPU -> speed it up */
3824 __enable_ibs_on_vcpu(vcpu);
3825 } else if (started_vcpus == 1) {
3827 * As we are starting a second VCPU, we have to disable
3828 * the IBS facility on all VCPUs to remove potentially
3829 * oustanding ENABLE requests.
3831 __disable_ibs_on_all_vcpus(vcpu->kvm);
3834 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3836 * Another VCPU might have used IBS while we were offline.
3837 * Let's play safe and flush the VCPU at startup.
3839 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3840 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3844 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3846 int i, online_vcpus, started_vcpus = 0;
3847 struct kvm_vcpu *started_vcpu = NULL;
3849 if (is_vcpu_stopped(vcpu))
3852 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3853 /* Only one cpu at a time may enter/leave the STOPPED state. */
3854 spin_lock(&vcpu->kvm->arch.start_stop_lock);
3855 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3857 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3858 kvm_s390_clear_stop_irq(vcpu);
3860 kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3861 __disable_ibs_on_vcpu(vcpu);
3863 for (i = 0; i < online_vcpus; i++) {
3864 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3866 started_vcpu = vcpu->kvm->vcpus[i];
3870 if (started_vcpus == 1) {
3872 * As we only have one VCPU left, we want to enable the
3873 * IBS facility for that VCPU to speed it up.
3875 __enable_ibs_on_vcpu(started_vcpu);
3878 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3882 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3883 struct kvm_enable_cap *cap)
3891 case KVM_CAP_S390_CSS_SUPPORT:
3892 if (!vcpu->kvm->arch.css_support) {
3893 vcpu->kvm->arch.css_support = 1;
3894 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3895 trace_kvm_s390_enable_css(vcpu->kvm);
3906 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3907 struct kvm_s390_mem_op *mop)
3909 void __user *uaddr = (void __user *)mop->buf;
3910 void *tmpbuf = NULL;
3912 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3913 | KVM_S390_MEMOP_F_CHECK_ONLY;
3915 if (mop->flags & ~supported_flags)
3918 if (mop->size > MEM_OP_MAX_SIZE)
3921 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3922 tmpbuf = vmalloc(mop->size);
3927 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3930 case KVM_S390_MEMOP_LOGICAL_READ:
3931 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3932 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3933 mop->size, GACC_FETCH);
3936 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3938 if (copy_to_user(uaddr, tmpbuf, mop->size))
3942 case KVM_S390_MEMOP_LOGICAL_WRITE:
3943 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3944 r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3945 mop->size, GACC_STORE);
3948 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3952 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3958 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3960 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3961 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3967 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3968 unsigned int ioctl, unsigned long arg)
3970 struct kvm_vcpu *vcpu = filp->private_data;
3971 void __user *argp = (void __user *)arg;
3974 case KVM_S390_IRQ: {
3975 struct kvm_s390_irq s390irq;
3977 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3979 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3981 case KVM_S390_INTERRUPT: {
3982 struct kvm_s390_interrupt s390int;
3983 struct kvm_s390_irq s390irq;
3985 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3987 if (s390int_to_s390irq(&s390int, &s390irq))
3989 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3992 return -ENOIOCTLCMD;
3995 long kvm_arch_vcpu_ioctl(struct file *filp,
3996 unsigned int ioctl, unsigned long arg)
3998 struct kvm_vcpu *vcpu = filp->private_data;
3999 void __user *argp = (void __user *)arg;
4006 case KVM_S390_STORE_STATUS:
4007 idx = srcu_read_lock(&vcpu->kvm->srcu);
4008 r = kvm_s390_vcpu_store_status(vcpu, arg);
4009 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4011 case KVM_S390_SET_INITIAL_PSW: {
4015 if (copy_from_user(&psw, argp, sizeof(psw)))
4017 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4020 case KVM_S390_INITIAL_RESET:
4021 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4023 case KVM_SET_ONE_REG:
4024 case KVM_GET_ONE_REG: {
4025 struct kvm_one_reg reg;
4027 if (copy_from_user(®, argp, sizeof(reg)))
4029 if (ioctl == KVM_SET_ONE_REG)
4030 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®);
4032 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®);
4035 #ifdef CONFIG_KVM_S390_UCONTROL
4036 case KVM_S390_UCAS_MAP: {
4037 struct kvm_s390_ucas_mapping ucasmap;
4039 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4044 if (!kvm_is_ucontrol(vcpu->kvm)) {
4049 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4050 ucasmap.vcpu_addr, ucasmap.length);
4053 case KVM_S390_UCAS_UNMAP: {
4054 struct kvm_s390_ucas_mapping ucasmap;
4056 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4061 if (!kvm_is_ucontrol(vcpu->kvm)) {
4066 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4071 case KVM_S390_VCPU_FAULT: {
4072 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4075 case KVM_ENABLE_CAP:
4077 struct kvm_enable_cap cap;
4079 if (copy_from_user(&cap, argp, sizeof(cap)))
4081 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4084 case KVM_S390_MEM_OP: {
4085 struct kvm_s390_mem_op mem_op;
4087 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4088 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4093 case KVM_S390_SET_IRQ_STATE: {
4094 struct kvm_s390_irq_state irq_state;
4097 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4099 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4100 irq_state.len == 0 ||
4101 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4105 /* do not use irq_state.flags, it will break old QEMUs */
4106 r = kvm_s390_set_irq_state(vcpu,
4107 (void __user *) irq_state.buf,
4111 case KVM_S390_GET_IRQ_STATE: {
4112 struct kvm_s390_irq_state irq_state;
4115 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4117 if (irq_state.len == 0) {
4121 /* do not use irq_state.flags, it will break old QEMUs */
4122 r = kvm_s390_get_irq_state(vcpu,
4123 (__u8 __user *) irq_state.buf,
4135 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4137 #ifdef CONFIG_KVM_S390_UCONTROL
4138 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4139 && (kvm_is_ucontrol(vcpu->kvm))) {
4140 vmf->page = virt_to_page(vcpu->arch.sie_block);
4141 get_page(vmf->page);
4145 return VM_FAULT_SIGBUS;
4148 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4149 unsigned long npages)
4154 /* Section: memory related */
4155 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4156 struct kvm_memory_slot *memslot,
4157 const struct kvm_userspace_memory_region *mem,
4158 enum kvm_mr_change change)
4160 /* A few sanity checks. We can have memory slots which have to be
4161 located/ended at a segment boundary (1MB). The memory in userland is
4162 ok to be fragmented into various different vmas. It is okay to mmap()
4163 and munmap() stuff in this slot after doing this call at any time */
4165 if (mem->userspace_addr & 0xffffful)
4168 if (mem->memory_size & 0xffffful)
4171 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4177 void kvm_arch_commit_memory_region(struct kvm *kvm,
4178 const struct kvm_userspace_memory_region *mem,
4179 const struct kvm_memory_slot *old,
4180 const struct kvm_memory_slot *new,
4181 enum kvm_mr_change change)
4185 /* If the basics of the memslot do not change, we do not want
4186 * to update the gmap. Every update causes several unnecessary
4187 * segment translation exceptions. This is usually handled just
4188 * fine by the normal fault handler + gmap, but it will also
4189 * cause faults on the prefix page of running guest CPUs.
4191 if (old->userspace_addr == mem->userspace_addr &&
4192 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4193 old->npages * PAGE_SIZE == mem->memory_size)
4196 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4197 mem->guest_phys_addr, mem->memory_size);
4199 pr_warn("failed to commit memory region\n");
4203 static inline unsigned long nonhyp_mask(int i)
4205 unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4207 return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4210 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4212 vcpu->valid_wakeup = false;
4215 static int __init kvm_s390_init(void)
4219 if (!sclp.has_sief2) {
4220 pr_info("SIE not available\n");
4224 if (nested && hpage) {
4225 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4229 for (i = 0; i < 16; i++)
4230 kvm_s390_fac_base[i] |=
4231 S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4233 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4236 static void __exit kvm_s390_exit(void)
4241 module_init(kvm_s390_init);
4242 module_exit(kvm_s390_exit);
4245 * Enable autoloading of the kvm module.
4246 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4247 * since x86 takes a different approach.
4249 #include <linux/miscdevice.h>
4250 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4251 MODULE_ALIAS("devname:kvm");