Merge tag 'sound-4.20-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai...
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #include <linux/compiler.h>
15 #include <linux/err.h>
16 #include <linux/fs.h>
17 #include <linux/hrtimer.h>
18 #include <linux/init.h>
19 #include <linux/kvm.h>
20 #include <linux/kvm_host.h>
21 #include <linux/mman.h>
22 #include <linux/module.h>
23 #include <linux/moduleparam.h>
24 #include <linux/random.h>
25 #include <linux/slab.h>
26 #include <linux/timer.h>
27 #include <linux/vmalloc.h>
28 #include <linux/bitmap.h>
29 #include <linux/sched/signal.h>
30 #include <linux/string.h>
31
32 #include <asm/asm-offsets.h>
33 #include <asm/lowcore.h>
34 #include <asm/stp.h>
35 #include <asm/pgtable.h>
36 #include <asm/gmap.h>
37 #include <asm/nmi.h>
38 #include <asm/switch_to.h>
39 #include <asm/isc.h>
40 #include <asm/sclp.h>
41 #include <asm/cpacf.h>
42 #include <asm/timex.h>
43 #include <asm/ap.h>
44 #include "kvm-s390.h"
45 #include "gaccess.h"
46
47 #define KMSG_COMPONENT "kvm-s390"
48 #undef pr_fmt
49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64         { "userspace_handled", VCPU_STAT(exit_userspace) },
65         { "exit_null", VCPU_STAT(exit_null) },
66         { "exit_validity", VCPU_STAT(exit_validity) },
67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68         { "exit_external_request", VCPU_STAT(exit_external_request) },
69         { "exit_io_request", VCPU_STAT(exit_io_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93         { "deliver_program", VCPU_STAT(deliver_program) },
94         { "deliver_io", VCPU_STAT(deliver_io) },
95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
97         { "inject_ckc", VCPU_STAT(inject_ckc) },
98         { "inject_cputm", VCPU_STAT(inject_cputm) },
99         { "inject_external_call", VCPU_STAT(inject_external_call) },
100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102         { "inject_io", VM_STAT(inject_io) },
103         { "inject_mchk", VCPU_STAT(inject_mchk) },
104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
105         { "inject_program", VCPU_STAT(inject_program) },
106         { "inject_restart", VCPU_STAT(inject_restart) },
107         { "inject_service_signal", VM_STAT(inject_service_signal) },
108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111         { "inject_virtio", VM_STAT(inject_virtio) },
112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
113         { "instruction_gs", VCPU_STAT(instruction_gs) },
114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
120         { "instruction_sck", VCPU_STAT(instruction_sck) },
121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122         { "instruction_spx", VCPU_STAT(instruction_spx) },
123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
124         { "instruction_stap", VCPU_STAT(instruction_stap) },
125         { "instruction_iske", VCPU_STAT(instruction_iske) },
126         { "instruction_ri", VCPU_STAT(instruction_ri) },
127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128         { "instruction_sske", VCPU_STAT(instruction_sske) },
129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130         { "instruction_essa", VCPU_STAT(instruction_essa) },
131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
133         { "instruction_tb", VCPU_STAT(instruction_tb) },
134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138         { "instruction_sie", VCPU_STAT(instruction_sie) },
139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
159         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
160         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
161         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
162         { NULL }
163 };
164
165 struct kvm_s390_tod_clock_ext {
166         __u8 epoch_idx;
167         __u64 tod;
168         __u8 reserved[7];
169 } __packed;
170
171 /* allow nested virtualization in KVM (if enabled by user space) */
172 static int nested;
173 module_param(nested, int, S_IRUGO);
174 MODULE_PARM_DESC(nested, "Nested virtualization support");
175
176 /* allow 1m huge page guest backing, if !nested */
177 static int hpage;
178 module_param(hpage, int, 0444);
179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
180
181 /*
182  * For now we handle at most 16 double words as this is what the s390 base
183  * kernel handles and stores in the prefix page. If we ever need to go beyond
184  * this, this requires changes to code, but the external uapi can stay.
185  */
186 #define SIZE_INTERNAL 16
187
188 /*
189  * Base feature mask that defines default mask for facilities. Consists of the
190  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
191  */
192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
193 /*
194  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
195  * and defines the facilities that can be enabled via a cpu model.
196  */
197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
198
199 static unsigned long kvm_s390_fac_size(void)
200 {
201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
202         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
203         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
204                 sizeof(S390_lowcore.stfle_fac_list));
205
206         return SIZE_INTERNAL;
207 }
208
209 /* available cpu features supported by kvm */
210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
211 /* available subfunctions indicated via query / "test bit" */
212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
213
214 static struct gmap_notifier gmap_notifier;
215 static struct gmap_notifier vsie_gmap_notifier;
216 debug_info_t *kvm_s390_dbf;
217
218 /* Section: not file related */
219 int kvm_arch_hardware_enable(void)
220 {
221         /* every s390 is virtualization enabled ;-) */
222         return 0;
223 }
224
225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
226                               unsigned long end);
227
228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
229 {
230         u8 delta_idx = 0;
231
232         /*
233          * The TOD jumps by delta, we have to compensate this by adding
234          * -delta to the epoch.
235          */
236         delta = -delta;
237
238         /* sign-extension - we're adding to signed values below */
239         if ((s64)delta < 0)
240                 delta_idx = -1;
241
242         scb->epoch += delta;
243         if (scb->ecd & ECD_MEF) {
244                 scb->epdx += delta_idx;
245                 if (scb->epoch < delta)
246                         scb->epdx += 1;
247         }
248 }
249
250 /*
251  * This callback is executed during stop_machine(). All CPUs are therefore
252  * temporarily stopped. In order not to change guest behavior, we have to
253  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
254  * so a CPU won't be stopped while calculating with the epoch.
255  */
256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
257                           void *v)
258 {
259         struct kvm *kvm;
260         struct kvm_vcpu *vcpu;
261         int i;
262         unsigned long long *delta = v;
263
264         list_for_each_entry(kvm, &vm_list, vm_list) {
265                 kvm_for_each_vcpu(i, vcpu, kvm) {
266                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
267                         if (i == 0) {
268                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
269                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
270                         }
271                         if (vcpu->arch.cputm_enabled)
272                                 vcpu->arch.cputm_start += *delta;
273                         if (vcpu->arch.vsie_block)
274                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
275                                                    *delta);
276                 }
277         }
278         return NOTIFY_OK;
279 }
280
281 static struct notifier_block kvm_clock_notifier = {
282         .notifier_call = kvm_clock_sync,
283 };
284
285 int kvm_arch_hardware_setup(void)
286 {
287         gmap_notifier.notifier_call = kvm_gmap_notifier;
288         gmap_register_pte_notifier(&gmap_notifier);
289         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
290         gmap_register_pte_notifier(&vsie_gmap_notifier);
291         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
292                                        &kvm_clock_notifier);
293         return 0;
294 }
295
296 void kvm_arch_hardware_unsetup(void)
297 {
298         gmap_unregister_pte_notifier(&gmap_notifier);
299         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
300         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
301                                          &kvm_clock_notifier);
302 }
303
304 static void allow_cpu_feat(unsigned long nr)
305 {
306         set_bit_inv(nr, kvm_s390_available_cpu_feat);
307 }
308
309 static inline int plo_test_bit(unsigned char nr)
310 {
311         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
312         int cc;
313
314         asm volatile(
315                 /* Parameter registers are ignored for "test bit" */
316                 "       plo     0,0,0,0(0)\n"
317                 "       ipm     %0\n"
318                 "       srl     %0,28\n"
319                 : "=d" (cc)
320                 : "d" (r0)
321                 : "cc");
322         return cc == 0;
323 }
324
325 static void kvm_s390_cpu_feat_init(void)
326 {
327         int i;
328
329         for (i = 0; i < 256; ++i) {
330                 if (plo_test_bit(i))
331                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
332         }
333
334         if (test_facility(28)) /* TOD-clock steering */
335                 ptff(kvm_s390_available_subfunc.ptff,
336                      sizeof(kvm_s390_available_subfunc.ptff),
337                      PTFF_QAF);
338
339         if (test_facility(17)) { /* MSA */
340                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
341                               kvm_s390_available_subfunc.kmac);
342                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
343                               kvm_s390_available_subfunc.kmc);
344                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
345                               kvm_s390_available_subfunc.km);
346                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
347                               kvm_s390_available_subfunc.kimd);
348                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
349                               kvm_s390_available_subfunc.klmd);
350         }
351         if (test_facility(76)) /* MSA3 */
352                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
353                               kvm_s390_available_subfunc.pckmo);
354         if (test_facility(77)) { /* MSA4 */
355                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
356                               kvm_s390_available_subfunc.kmctr);
357                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
358                               kvm_s390_available_subfunc.kmf);
359                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
360                               kvm_s390_available_subfunc.kmo);
361                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
362                               kvm_s390_available_subfunc.pcc);
363         }
364         if (test_facility(57)) /* MSA5 */
365                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
366                               kvm_s390_available_subfunc.ppno);
367
368         if (test_facility(146)) /* MSA8 */
369                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
370                               kvm_s390_available_subfunc.kma);
371
372         if (MACHINE_HAS_ESOP)
373                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
374         /*
375          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
376          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
377          */
378         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
379             !test_facility(3) || !nested)
380                 return;
381         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
382         if (sclp.has_64bscao)
383                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
384         if (sclp.has_siif)
385                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
386         if (sclp.has_gpere)
387                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
388         if (sclp.has_gsls)
389                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
390         if (sclp.has_ib)
391                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
392         if (sclp.has_cei)
393                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
394         if (sclp.has_ibs)
395                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
396         if (sclp.has_kss)
397                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
398         /*
399          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
400          * all skey handling functions read/set the skey from the PGSTE
401          * instead of the real storage key.
402          *
403          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
404          * pages being detected as preserved although they are resident.
405          *
406          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
407          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
408          *
409          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
410          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
411          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
412          *
413          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
414          * cannot easily shadow the SCA because of the ipte lock.
415          */
416 }
417
418 int kvm_arch_init(void *opaque)
419 {
420         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
421         if (!kvm_s390_dbf)
422                 return -ENOMEM;
423
424         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
425                 debug_unregister(kvm_s390_dbf);
426                 return -ENOMEM;
427         }
428
429         kvm_s390_cpu_feat_init();
430
431         /* Register floating interrupt controller interface. */
432         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
433 }
434
435 void kvm_arch_exit(void)
436 {
437         debug_unregister(kvm_s390_dbf);
438 }
439
440 /* Section: device related */
441 long kvm_arch_dev_ioctl(struct file *filp,
442                         unsigned int ioctl, unsigned long arg)
443 {
444         if (ioctl == KVM_S390_ENABLE_SIE)
445                 return s390_enable_sie();
446         return -EINVAL;
447 }
448
449 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
450 {
451         int r;
452
453         switch (ext) {
454         case KVM_CAP_S390_PSW:
455         case KVM_CAP_S390_GMAP:
456         case KVM_CAP_SYNC_MMU:
457 #ifdef CONFIG_KVM_S390_UCONTROL
458         case KVM_CAP_S390_UCONTROL:
459 #endif
460         case KVM_CAP_ASYNC_PF:
461         case KVM_CAP_SYNC_REGS:
462         case KVM_CAP_ONE_REG:
463         case KVM_CAP_ENABLE_CAP:
464         case KVM_CAP_S390_CSS_SUPPORT:
465         case KVM_CAP_IOEVENTFD:
466         case KVM_CAP_DEVICE_CTRL:
467         case KVM_CAP_ENABLE_CAP_VM:
468         case KVM_CAP_S390_IRQCHIP:
469         case KVM_CAP_VM_ATTRIBUTES:
470         case KVM_CAP_MP_STATE:
471         case KVM_CAP_IMMEDIATE_EXIT:
472         case KVM_CAP_S390_INJECT_IRQ:
473         case KVM_CAP_S390_USER_SIGP:
474         case KVM_CAP_S390_USER_STSI:
475         case KVM_CAP_S390_SKEYS:
476         case KVM_CAP_S390_IRQ_STATE:
477         case KVM_CAP_S390_USER_INSTR0:
478         case KVM_CAP_S390_CMMA_MIGRATION:
479         case KVM_CAP_S390_AIS:
480         case KVM_CAP_S390_AIS_MIGRATION:
481                 r = 1;
482                 break;
483         case KVM_CAP_S390_HPAGE_1M:
484                 r = 0;
485                 if (hpage && !kvm_is_ucontrol(kvm))
486                         r = 1;
487                 break;
488         case KVM_CAP_S390_MEM_OP:
489                 r = MEM_OP_MAX_SIZE;
490                 break;
491         case KVM_CAP_NR_VCPUS:
492         case KVM_CAP_MAX_VCPUS:
493                 r = KVM_S390_BSCA_CPU_SLOTS;
494                 if (!kvm_s390_use_sca_entries())
495                         r = KVM_MAX_VCPUS;
496                 else if (sclp.has_esca && sclp.has_64bscao)
497                         r = KVM_S390_ESCA_CPU_SLOTS;
498                 break;
499         case KVM_CAP_NR_MEMSLOTS:
500                 r = KVM_USER_MEM_SLOTS;
501                 break;
502         case KVM_CAP_S390_COW:
503                 r = MACHINE_HAS_ESOP;
504                 break;
505         case KVM_CAP_S390_VECTOR_REGISTERS:
506                 r = MACHINE_HAS_VX;
507                 break;
508         case KVM_CAP_S390_RI:
509                 r = test_facility(64);
510                 break;
511         case KVM_CAP_S390_GS:
512                 r = test_facility(133);
513                 break;
514         case KVM_CAP_S390_BPB:
515                 r = test_facility(82);
516                 break;
517         default:
518                 r = 0;
519         }
520         return r;
521 }
522
523 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
524                                     struct kvm_memory_slot *memslot)
525 {
526         int i;
527         gfn_t cur_gfn, last_gfn;
528         unsigned long gaddr, vmaddr;
529         struct gmap *gmap = kvm->arch.gmap;
530         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
531
532         /* Loop over all guest segments */
533         cur_gfn = memslot->base_gfn;
534         last_gfn = memslot->base_gfn + memslot->npages;
535         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
536                 gaddr = gfn_to_gpa(cur_gfn);
537                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
538                 if (kvm_is_error_hva(vmaddr))
539                         continue;
540
541                 bitmap_zero(bitmap, _PAGE_ENTRIES);
542                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
543                 for (i = 0; i < _PAGE_ENTRIES; i++) {
544                         if (test_bit(i, bitmap))
545                                 mark_page_dirty(kvm, cur_gfn + i);
546                 }
547
548                 if (fatal_signal_pending(current))
549                         return;
550                 cond_resched();
551         }
552 }
553
554 /* Section: vm related */
555 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
556
557 /*
558  * Get (and clear) the dirty memory log for a memory slot.
559  */
560 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
561                                struct kvm_dirty_log *log)
562 {
563         int r;
564         unsigned long n;
565         struct kvm_memslots *slots;
566         struct kvm_memory_slot *memslot;
567         int is_dirty = 0;
568
569         if (kvm_is_ucontrol(kvm))
570                 return -EINVAL;
571
572         mutex_lock(&kvm->slots_lock);
573
574         r = -EINVAL;
575         if (log->slot >= KVM_USER_MEM_SLOTS)
576                 goto out;
577
578         slots = kvm_memslots(kvm);
579         memslot = id_to_memslot(slots, log->slot);
580         r = -ENOENT;
581         if (!memslot->dirty_bitmap)
582                 goto out;
583
584         kvm_s390_sync_dirty_log(kvm, memslot);
585         r = kvm_get_dirty_log(kvm, log, &is_dirty);
586         if (r)
587                 goto out;
588
589         /* Clear the dirty log */
590         if (is_dirty) {
591                 n = kvm_dirty_bitmap_bytes(memslot);
592                 memset(memslot->dirty_bitmap, 0, n);
593         }
594         r = 0;
595 out:
596         mutex_unlock(&kvm->slots_lock);
597         return r;
598 }
599
600 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
601 {
602         unsigned int i;
603         struct kvm_vcpu *vcpu;
604
605         kvm_for_each_vcpu(i, vcpu, kvm) {
606                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
607         }
608 }
609
610 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
611 {
612         int r;
613
614         if (cap->flags)
615                 return -EINVAL;
616
617         switch (cap->cap) {
618         case KVM_CAP_S390_IRQCHIP:
619                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
620                 kvm->arch.use_irqchip = 1;
621                 r = 0;
622                 break;
623         case KVM_CAP_S390_USER_SIGP:
624                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
625                 kvm->arch.user_sigp = 1;
626                 r = 0;
627                 break;
628         case KVM_CAP_S390_VECTOR_REGISTERS:
629                 mutex_lock(&kvm->lock);
630                 if (kvm->created_vcpus) {
631                         r = -EBUSY;
632                 } else if (MACHINE_HAS_VX) {
633                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
634                         set_kvm_facility(kvm->arch.model.fac_list, 129);
635                         if (test_facility(134)) {
636                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
637                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
638                         }
639                         if (test_facility(135)) {
640                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
641                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
642                         }
643                         r = 0;
644                 } else
645                         r = -EINVAL;
646                 mutex_unlock(&kvm->lock);
647                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
648                          r ? "(not available)" : "(success)");
649                 break;
650         case KVM_CAP_S390_RI:
651                 r = -EINVAL;
652                 mutex_lock(&kvm->lock);
653                 if (kvm->created_vcpus) {
654                         r = -EBUSY;
655                 } else if (test_facility(64)) {
656                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
657                         set_kvm_facility(kvm->arch.model.fac_list, 64);
658                         r = 0;
659                 }
660                 mutex_unlock(&kvm->lock);
661                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
662                          r ? "(not available)" : "(success)");
663                 break;
664         case KVM_CAP_S390_AIS:
665                 mutex_lock(&kvm->lock);
666                 if (kvm->created_vcpus) {
667                         r = -EBUSY;
668                 } else {
669                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
670                         set_kvm_facility(kvm->arch.model.fac_list, 72);
671                         r = 0;
672                 }
673                 mutex_unlock(&kvm->lock);
674                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
675                          r ? "(not available)" : "(success)");
676                 break;
677         case KVM_CAP_S390_GS:
678                 r = -EINVAL;
679                 mutex_lock(&kvm->lock);
680                 if (kvm->created_vcpus) {
681                         r = -EBUSY;
682                 } else if (test_facility(133)) {
683                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
684                         set_kvm_facility(kvm->arch.model.fac_list, 133);
685                         r = 0;
686                 }
687                 mutex_unlock(&kvm->lock);
688                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
689                          r ? "(not available)" : "(success)");
690                 break;
691         case KVM_CAP_S390_HPAGE_1M:
692                 mutex_lock(&kvm->lock);
693                 if (kvm->created_vcpus)
694                         r = -EBUSY;
695                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
696                         r = -EINVAL;
697                 else {
698                         r = 0;
699                         down_write(&kvm->mm->mmap_sem);
700                         kvm->mm->context.allow_gmap_hpage_1m = 1;
701                         up_write(&kvm->mm->mmap_sem);
702                         /*
703                          * We might have to create fake 4k page
704                          * tables. To avoid that the hardware works on
705                          * stale PGSTEs, we emulate these instructions.
706                          */
707                         kvm->arch.use_skf = 0;
708                         kvm->arch.use_pfmfi = 0;
709                 }
710                 mutex_unlock(&kvm->lock);
711                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
712                          r ? "(not available)" : "(success)");
713                 break;
714         case KVM_CAP_S390_USER_STSI:
715                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
716                 kvm->arch.user_stsi = 1;
717                 r = 0;
718                 break;
719         case KVM_CAP_S390_USER_INSTR0:
720                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
721                 kvm->arch.user_instr0 = 1;
722                 icpt_operexc_on_all_vcpus(kvm);
723                 r = 0;
724                 break;
725         default:
726                 r = -EINVAL;
727                 break;
728         }
729         return r;
730 }
731
732 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
733 {
734         int ret;
735
736         switch (attr->attr) {
737         case KVM_S390_VM_MEM_LIMIT_SIZE:
738                 ret = 0;
739                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
740                          kvm->arch.mem_limit);
741                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
742                         ret = -EFAULT;
743                 break;
744         default:
745                 ret = -ENXIO;
746                 break;
747         }
748         return ret;
749 }
750
751 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
752 {
753         int ret;
754         unsigned int idx;
755         switch (attr->attr) {
756         case KVM_S390_VM_MEM_ENABLE_CMMA:
757                 ret = -ENXIO;
758                 if (!sclp.has_cmma)
759                         break;
760
761                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
762                 mutex_lock(&kvm->lock);
763                 if (kvm->created_vcpus)
764                         ret = -EBUSY;
765                 else if (kvm->mm->context.allow_gmap_hpage_1m)
766                         ret = -EINVAL;
767                 else {
768                         kvm->arch.use_cmma = 1;
769                         /* Not compatible with cmma. */
770                         kvm->arch.use_pfmfi = 0;
771                         ret = 0;
772                 }
773                 mutex_unlock(&kvm->lock);
774                 break;
775         case KVM_S390_VM_MEM_CLR_CMMA:
776                 ret = -ENXIO;
777                 if (!sclp.has_cmma)
778                         break;
779                 ret = -EINVAL;
780                 if (!kvm->arch.use_cmma)
781                         break;
782
783                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
784                 mutex_lock(&kvm->lock);
785                 idx = srcu_read_lock(&kvm->srcu);
786                 s390_reset_cmma(kvm->arch.gmap->mm);
787                 srcu_read_unlock(&kvm->srcu, idx);
788                 mutex_unlock(&kvm->lock);
789                 ret = 0;
790                 break;
791         case KVM_S390_VM_MEM_LIMIT_SIZE: {
792                 unsigned long new_limit;
793
794                 if (kvm_is_ucontrol(kvm))
795                         return -EINVAL;
796
797                 if (get_user(new_limit, (u64 __user *)attr->addr))
798                         return -EFAULT;
799
800                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
801                     new_limit > kvm->arch.mem_limit)
802                         return -E2BIG;
803
804                 if (!new_limit)
805                         return -EINVAL;
806
807                 /* gmap_create takes last usable address */
808                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
809                         new_limit -= 1;
810
811                 ret = -EBUSY;
812                 mutex_lock(&kvm->lock);
813                 if (!kvm->created_vcpus) {
814                         /* gmap_create will round the limit up */
815                         struct gmap *new = gmap_create(current->mm, new_limit);
816
817                         if (!new) {
818                                 ret = -ENOMEM;
819                         } else {
820                                 gmap_remove(kvm->arch.gmap);
821                                 new->private = kvm;
822                                 kvm->arch.gmap = new;
823                                 ret = 0;
824                         }
825                 }
826                 mutex_unlock(&kvm->lock);
827                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
828                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
829                          (void *) kvm->arch.gmap->asce);
830                 break;
831         }
832         default:
833                 ret = -ENXIO;
834                 break;
835         }
836         return ret;
837 }
838
839 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
840
841 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
842 {
843         struct kvm_vcpu *vcpu;
844         int i;
845
846         kvm_s390_vcpu_block_all(kvm);
847
848         kvm_for_each_vcpu(i, vcpu, kvm) {
849                 kvm_s390_vcpu_crypto_setup(vcpu);
850                 /* recreate the shadow crycb by leaving the VSIE handler */
851                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
852         }
853
854         kvm_s390_vcpu_unblock_all(kvm);
855 }
856
857 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
858 {
859         mutex_lock(&kvm->lock);
860         switch (attr->attr) {
861         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
862                 if (!test_kvm_facility(kvm, 76)) {
863                         mutex_unlock(&kvm->lock);
864                         return -EINVAL;
865                 }
866                 get_random_bytes(
867                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
868                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
869                 kvm->arch.crypto.aes_kw = 1;
870                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
871                 break;
872         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
873                 if (!test_kvm_facility(kvm, 76)) {
874                         mutex_unlock(&kvm->lock);
875                         return -EINVAL;
876                 }
877                 get_random_bytes(
878                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
879                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
880                 kvm->arch.crypto.dea_kw = 1;
881                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
882                 break;
883         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
884                 if (!test_kvm_facility(kvm, 76)) {
885                         mutex_unlock(&kvm->lock);
886                         return -EINVAL;
887                 }
888                 kvm->arch.crypto.aes_kw = 0;
889                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
890                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
891                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
892                 break;
893         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
894                 if (!test_kvm_facility(kvm, 76)) {
895                         mutex_unlock(&kvm->lock);
896                         return -EINVAL;
897                 }
898                 kvm->arch.crypto.dea_kw = 0;
899                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
900                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
901                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
902                 break;
903         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
904                 if (!ap_instructions_available()) {
905                         mutex_unlock(&kvm->lock);
906                         return -EOPNOTSUPP;
907                 }
908                 kvm->arch.crypto.apie = 1;
909                 break;
910         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
911                 if (!ap_instructions_available()) {
912                         mutex_unlock(&kvm->lock);
913                         return -EOPNOTSUPP;
914                 }
915                 kvm->arch.crypto.apie = 0;
916                 break;
917         default:
918                 mutex_unlock(&kvm->lock);
919                 return -ENXIO;
920         }
921
922         kvm_s390_vcpu_crypto_reset_all(kvm);
923         mutex_unlock(&kvm->lock);
924         return 0;
925 }
926
927 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
928 {
929         int cx;
930         struct kvm_vcpu *vcpu;
931
932         kvm_for_each_vcpu(cx, vcpu, kvm)
933                 kvm_s390_sync_request(req, vcpu);
934 }
935
936 /*
937  * Must be called with kvm->srcu held to avoid races on memslots, and with
938  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
939  */
940 static int kvm_s390_vm_start_migration(struct kvm *kvm)
941 {
942         struct kvm_memory_slot *ms;
943         struct kvm_memslots *slots;
944         unsigned long ram_pages = 0;
945         int slotnr;
946
947         /* migration mode already enabled */
948         if (kvm->arch.migration_mode)
949                 return 0;
950         slots = kvm_memslots(kvm);
951         if (!slots || !slots->used_slots)
952                 return -EINVAL;
953
954         if (!kvm->arch.use_cmma) {
955                 kvm->arch.migration_mode = 1;
956                 return 0;
957         }
958         /* mark all the pages in active slots as dirty */
959         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
960                 ms = slots->memslots + slotnr;
961                 /*
962                  * The second half of the bitmap is only used on x86,
963                  * and would be wasted otherwise, so we put it to good
964                  * use here to keep track of the state of the storage
965                  * attributes.
966                  */
967                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
968                 ram_pages += ms->npages;
969         }
970         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
971         kvm->arch.migration_mode = 1;
972         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
973         return 0;
974 }
975
976 /*
977  * Must be called with kvm->slots_lock to avoid races with ourselves and
978  * kvm_s390_vm_start_migration.
979  */
980 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
981 {
982         /* migration mode already disabled */
983         if (!kvm->arch.migration_mode)
984                 return 0;
985         kvm->arch.migration_mode = 0;
986         if (kvm->arch.use_cmma)
987                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
988         return 0;
989 }
990
991 static int kvm_s390_vm_set_migration(struct kvm *kvm,
992                                      struct kvm_device_attr *attr)
993 {
994         int res = -ENXIO;
995
996         mutex_lock(&kvm->slots_lock);
997         switch (attr->attr) {
998         case KVM_S390_VM_MIGRATION_START:
999                 res = kvm_s390_vm_start_migration(kvm);
1000                 break;
1001         case KVM_S390_VM_MIGRATION_STOP:
1002                 res = kvm_s390_vm_stop_migration(kvm);
1003                 break;
1004         default:
1005                 break;
1006         }
1007         mutex_unlock(&kvm->slots_lock);
1008
1009         return res;
1010 }
1011
1012 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1013                                      struct kvm_device_attr *attr)
1014 {
1015         u64 mig = kvm->arch.migration_mode;
1016
1017         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1018                 return -ENXIO;
1019
1020         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1021                 return -EFAULT;
1022         return 0;
1023 }
1024
1025 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1026 {
1027         struct kvm_s390_vm_tod_clock gtod;
1028
1029         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1030                 return -EFAULT;
1031
1032         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1033                 return -EINVAL;
1034         kvm_s390_set_tod_clock(kvm, &gtod);
1035
1036         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1037                 gtod.epoch_idx, gtod.tod);
1038
1039         return 0;
1040 }
1041
1042 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1043 {
1044         u8 gtod_high;
1045
1046         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1047                                            sizeof(gtod_high)))
1048                 return -EFAULT;
1049
1050         if (gtod_high != 0)
1051                 return -EINVAL;
1052         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1053
1054         return 0;
1055 }
1056
1057 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1058 {
1059         struct kvm_s390_vm_tod_clock gtod = { 0 };
1060
1061         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1062                            sizeof(gtod.tod)))
1063                 return -EFAULT;
1064
1065         kvm_s390_set_tod_clock(kvm, &gtod);
1066         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1067         return 0;
1068 }
1069
1070 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1071 {
1072         int ret;
1073
1074         if (attr->flags)
1075                 return -EINVAL;
1076
1077         switch (attr->attr) {
1078         case KVM_S390_VM_TOD_EXT:
1079                 ret = kvm_s390_set_tod_ext(kvm, attr);
1080                 break;
1081         case KVM_S390_VM_TOD_HIGH:
1082                 ret = kvm_s390_set_tod_high(kvm, attr);
1083                 break;
1084         case KVM_S390_VM_TOD_LOW:
1085                 ret = kvm_s390_set_tod_low(kvm, attr);
1086                 break;
1087         default:
1088                 ret = -ENXIO;
1089                 break;
1090         }
1091         return ret;
1092 }
1093
1094 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1095                                    struct kvm_s390_vm_tod_clock *gtod)
1096 {
1097         struct kvm_s390_tod_clock_ext htod;
1098
1099         preempt_disable();
1100
1101         get_tod_clock_ext((char *)&htod);
1102
1103         gtod->tod = htod.tod + kvm->arch.epoch;
1104         gtod->epoch_idx = 0;
1105         if (test_kvm_facility(kvm, 139)) {
1106                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1107                 if (gtod->tod < htod.tod)
1108                         gtod->epoch_idx += 1;
1109         }
1110
1111         preempt_enable();
1112 }
1113
1114 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1115 {
1116         struct kvm_s390_vm_tod_clock gtod;
1117
1118         memset(&gtod, 0, sizeof(gtod));
1119         kvm_s390_get_tod_clock(kvm, &gtod);
1120         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1121                 return -EFAULT;
1122
1123         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1124                 gtod.epoch_idx, gtod.tod);
1125         return 0;
1126 }
1127
1128 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1129 {
1130         u8 gtod_high = 0;
1131
1132         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1133                                          sizeof(gtod_high)))
1134                 return -EFAULT;
1135         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1136
1137         return 0;
1138 }
1139
1140 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1141 {
1142         u64 gtod;
1143
1144         gtod = kvm_s390_get_tod_clock_fast(kvm);
1145         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1146                 return -EFAULT;
1147         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1148
1149         return 0;
1150 }
1151
1152 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1153 {
1154         int ret;
1155
1156         if (attr->flags)
1157                 return -EINVAL;
1158
1159         switch (attr->attr) {
1160         case KVM_S390_VM_TOD_EXT:
1161                 ret = kvm_s390_get_tod_ext(kvm, attr);
1162                 break;
1163         case KVM_S390_VM_TOD_HIGH:
1164                 ret = kvm_s390_get_tod_high(kvm, attr);
1165                 break;
1166         case KVM_S390_VM_TOD_LOW:
1167                 ret = kvm_s390_get_tod_low(kvm, attr);
1168                 break;
1169         default:
1170                 ret = -ENXIO;
1171                 break;
1172         }
1173         return ret;
1174 }
1175
1176 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1177 {
1178         struct kvm_s390_vm_cpu_processor *proc;
1179         u16 lowest_ibc, unblocked_ibc;
1180         int ret = 0;
1181
1182         mutex_lock(&kvm->lock);
1183         if (kvm->created_vcpus) {
1184                 ret = -EBUSY;
1185                 goto out;
1186         }
1187         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1188         if (!proc) {
1189                 ret = -ENOMEM;
1190                 goto out;
1191         }
1192         if (!copy_from_user(proc, (void __user *)attr->addr,
1193                             sizeof(*proc))) {
1194                 kvm->arch.model.cpuid = proc->cpuid;
1195                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1196                 unblocked_ibc = sclp.ibc & 0xfff;
1197                 if (lowest_ibc && proc->ibc) {
1198                         if (proc->ibc > unblocked_ibc)
1199                                 kvm->arch.model.ibc = unblocked_ibc;
1200                         else if (proc->ibc < lowest_ibc)
1201                                 kvm->arch.model.ibc = lowest_ibc;
1202                         else
1203                                 kvm->arch.model.ibc = proc->ibc;
1204                 }
1205                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1206                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1207                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1208                          kvm->arch.model.ibc,
1209                          kvm->arch.model.cpuid);
1210                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1211                          kvm->arch.model.fac_list[0],
1212                          kvm->arch.model.fac_list[1],
1213                          kvm->arch.model.fac_list[2]);
1214         } else
1215                 ret = -EFAULT;
1216         kfree(proc);
1217 out:
1218         mutex_unlock(&kvm->lock);
1219         return ret;
1220 }
1221
1222 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1223                                        struct kvm_device_attr *attr)
1224 {
1225         struct kvm_s390_vm_cpu_feat data;
1226
1227         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1228                 return -EFAULT;
1229         if (!bitmap_subset((unsigned long *) data.feat,
1230                            kvm_s390_available_cpu_feat,
1231                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1232                 return -EINVAL;
1233
1234         mutex_lock(&kvm->lock);
1235         if (kvm->created_vcpus) {
1236                 mutex_unlock(&kvm->lock);
1237                 return -EBUSY;
1238         }
1239         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1240                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1241         mutex_unlock(&kvm->lock);
1242         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1243                          data.feat[0],
1244                          data.feat[1],
1245                          data.feat[2]);
1246         return 0;
1247 }
1248
1249 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1250                                           struct kvm_device_attr *attr)
1251 {
1252         /*
1253          * Once supported by kernel + hw, we have to store the subfunctions
1254          * in kvm->arch and remember that user space configured them.
1255          */
1256         return -ENXIO;
1257 }
1258
1259 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1260 {
1261         int ret = -ENXIO;
1262
1263         switch (attr->attr) {
1264         case KVM_S390_VM_CPU_PROCESSOR:
1265                 ret = kvm_s390_set_processor(kvm, attr);
1266                 break;
1267         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1268                 ret = kvm_s390_set_processor_feat(kvm, attr);
1269                 break;
1270         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1271                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1272                 break;
1273         }
1274         return ret;
1275 }
1276
1277 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1278 {
1279         struct kvm_s390_vm_cpu_processor *proc;
1280         int ret = 0;
1281
1282         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1283         if (!proc) {
1284                 ret = -ENOMEM;
1285                 goto out;
1286         }
1287         proc->cpuid = kvm->arch.model.cpuid;
1288         proc->ibc = kvm->arch.model.ibc;
1289         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1290                S390_ARCH_FAC_LIST_SIZE_BYTE);
1291         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1292                  kvm->arch.model.ibc,
1293                  kvm->arch.model.cpuid);
1294         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1295                  kvm->arch.model.fac_list[0],
1296                  kvm->arch.model.fac_list[1],
1297                  kvm->arch.model.fac_list[2]);
1298         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1299                 ret = -EFAULT;
1300         kfree(proc);
1301 out:
1302         return ret;
1303 }
1304
1305 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1306 {
1307         struct kvm_s390_vm_cpu_machine *mach;
1308         int ret = 0;
1309
1310         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1311         if (!mach) {
1312                 ret = -ENOMEM;
1313                 goto out;
1314         }
1315         get_cpu_id((struct cpuid *) &mach->cpuid);
1316         mach->ibc = sclp.ibc;
1317         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1318                S390_ARCH_FAC_LIST_SIZE_BYTE);
1319         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1320                sizeof(S390_lowcore.stfle_fac_list));
1321         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1322                  kvm->arch.model.ibc,
1323                  kvm->arch.model.cpuid);
1324         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1325                  mach->fac_mask[0],
1326                  mach->fac_mask[1],
1327                  mach->fac_mask[2]);
1328         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1329                  mach->fac_list[0],
1330                  mach->fac_list[1],
1331                  mach->fac_list[2]);
1332         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1333                 ret = -EFAULT;
1334         kfree(mach);
1335 out:
1336         return ret;
1337 }
1338
1339 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1340                                        struct kvm_device_attr *attr)
1341 {
1342         struct kvm_s390_vm_cpu_feat data;
1343
1344         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1345                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1346         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1347                 return -EFAULT;
1348         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1349                          data.feat[0],
1350                          data.feat[1],
1351                          data.feat[2]);
1352         return 0;
1353 }
1354
1355 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1356                                      struct kvm_device_attr *attr)
1357 {
1358         struct kvm_s390_vm_cpu_feat data;
1359
1360         bitmap_copy((unsigned long *) data.feat,
1361                     kvm_s390_available_cpu_feat,
1362                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1363         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1364                 return -EFAULT;
1365         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1366                          data.feat[0],
1367                          data.feat[1],
1368                          data.feat[2]);
1369         return 0;
1370 }
1371
1372 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1373                                           struct kvm_device_attr *attr)
1374 {
1375         /*
1376          * Once we can actually configure subfunctions (kernel + hw support),
1377          * we have to check if they were already set by user space, if so copy
1378          * them from kvm->arch.
1379          */
1380         return -ENXIO;
1381 }
1382
1383 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1384                                         struct kvm_device_attr *attr)
1385 {
1386         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1387             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1388                 return -EFAULT;
1389         return 0;
1390 }
1391 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1392 {
1393         int ret = -ENXIO;
1394
1395         switch (attr->attr) {
1396         case KVM_S390_VM_CPU_PROCESSOR:
1397                 ret = kvm_s390_get_processor(kvm, attr);
1398                 break;
1399         case KVM_S390_VM_CPU_MACHINE:
1400                 ret = kvm_s390_get_machine(kvm, attr);
1401                 break;
1402         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1403                 ret = kvm_s390_get_processor_feat(kvm, attr);
1404                 break;
1405         case KVM_S390_VM_CPU_MACHINE_FEAT:
1406                 ret = kvm_s390_get_machine_feat(kvm, attr);
1407                 break;
1408         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1409                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1410                 break;
1411         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1412                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1413                 break;
1414         }
1415         return ret;
1416 }
1417
1418 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1419 {
1420         int ret;
1421
1422         switch (attr->group) {
1423         case KVM_S390_VM_MEM_CTRL:
1424                 ret = kvm_s390_set_mem_control(kvm, attr);
1425                 break;
1426         case KVM_S390_VM_TOD:
1427                 ret = kvm_s390_set_tod(kvm, attr);
1428                 break;
1429         case KVM_S390_VM_CPU_MODEL:
1430                 ret = kvm_s390_set_cpu_model(kvm, attr);
1431                 break;
1432         case KVM_S390_VM_CRYPTO:
1433                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1434                 break;
1435         case KVM_S390_VM_MIGRATION:
1436                 ret = kvm_s390_vm_set_migration(kvm, attr);
1437                 break;
1438         default:
1439                 ret = -ENXIO;
1440                 break;
1441         }
1442
1443         return ret;
1444 }
1445
1446 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1447 {
1448         int ret;
1449
1450         switch (attr->group) {
1451         case KVM_S390_VM_MEM_CTRL:
1452                 ret = kvm_s390_get_mem_control(kvm, attr);
1453                 break;
1454         case KVM_S390_VM_TOD:
1455                 ret = kvm_s390_get_tod(kvm, attr);
1456                 break;
1457         case KVM_S390_VM_CPU_MODEL:
1458                 ret = kvm_s390_get_cpu_model(kvm, attr);
1459                 break;
1460         case KVM_S390_VM_MIGRATION:
1461                 ret = kvm_s390_vm_get_migration(kvm, attr);
1462                 break;
1463         default:
1464                 ret = -ENXIO;
1465                 break;
1466         }
1467
1468         return ret;
1469 }
1470
1471 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1472 {
1473         int ret;
1474
1475         switch (attr->group) {
1476         case KVM_S390_VM_MEM_CTRL:
1477                 switch (attr->attr) {
1478                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1479                 case KVM_S390_VM_MEM_CLR_CMMA:
1480                         ret = sclp.has_cmma ? 0 : -ENXIO;
1481                         break;
1482                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1483                         ret = 0;
1484                         break;
1485                 default:
1486                         ret = -ENXIO;
1487                         break;
1488                 }
1489                 break;
1490         case KVM_S390_VM_TOD:
1491                 switch (attr->attr) {
1492                 case KVM_S390_VM_TOD_LOW:
1493                 case KVM_S390_VM_TOD_HIGH:
1494                         ret = 0;
1495                         break;
1496                 default:
1497                         ret = -ENXIO;
1498                         break;
1499                 }
1500                 break;
1501         case KVM_S390_VM_CPU_MODEL:
1502                 switch (attr->attr) {
1503                 case KVM_S390_VM_CPU_PROCESSOR:
1504                 case KVM_S390_VM_CPU_MACHINE:
1505                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1506                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1507                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1508                         ret = 0;
1509                         break;
1510                 /* configuring subfunctions is not supported yet */
1511                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1512                 default:
1513                         ret = -ENXIO;
1514                         break;
1515                 }
1516                 break;
1517         case KVM_S390_VM_CRYPTO:
1518                 switch (attr->attr) {
1519                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1520                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1521                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1522                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1523                         ret = 0;
1524                         break;
1525                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1526                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1527                         ret = ap_instructions_available() ? 0 : -ENXIO;
1528                         break;
1529                 default:
1530                         ret = -ENXIO;
1531                         break;
1532                 }
1533                 break;
1534         case KVM_S390_VM_MIGRATION:
1535                 ret = 0;
1536                 break;
1537         default:
1538                 ret = -ENXIO;
1539                 break;
1540         }
1541
1542         return ret;
1543 }
1544
1545 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1546 {
1547         uint8_t *keys;
1548         uint64_t hva;
1549         int srcu_idx, i, r = 0;
1550
1551         if (args->flags != 0)
1552                 return -EINVAL;
1553
1554         /* Is this guest using storage keys? */
1555         if (!mm_uses_skeys(current->mm))
1556                 return KVM_S390_GET_SKEYS_NONE;
1557
1558         /* Enforce sane limit on memory allocation */
1559         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1560                 return -EINVAL;
1561
1562         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1563         if (!keys)
1564                 return -ENOMEM;
1565
1566         down_read(&current->mm->mmap_sem);
1567         srcu_idx = srcu_read_lock(&kvm->srcu);
1568         for (i = 0; i < args->count; i++) {
1569                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1570                 if (kvm_is_error_hva(hva)) {
1571                         r = -EFAULT;
1572                         break;
1573                 }
1574
1575                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1576                 if (r)
1577                         break;
1578         }
1579         srcu_read_unlock(&kvm->srcu, srcu_idx);
1580         up_read(&current->mm->mmap_sem);
1581
1582         if (!r) {
1583                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1584                                  sizeof(uint8_t) * args->count);
1585                 if (r)
1586                         r = -EFAULT;
1587         }
1588
1589         kvfree(keys);
1590         return r;
1591 }
1592
1593 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1594 {
1595         uint8_t *keys;
1596         uint64_t hva;
1597         int srcu_idx, i, r = 0;
1598         bool unlocked;
1599
1600         if (args->flags != 0)
1601                 return -EINVAL;
1602
1603         /* Enforce sane limit on memory allocation */
1604         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1605                 return -EINVAL;
1606
1607         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1608         if (!keys)
1609                 return -ENOMEM;
1610
1611         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1612                            sizeof(uint8_t) * args->count);
1613         if (r) {
1614                 r = -EFAULT;
1615                 goto out;
1616         }
1617
1618         /* Enable storage key handling for the guest */
1619         r = s390_enable_skey();
1620         if (r)
1621                 goto out;
1622
1623         i = 0;
1624         down_read(&current->mm->mmap_sem);
1625         srcu_idx = srcu_read_lock(&kvm->srcu);
1626         while (i < args->count) {
1627                 unlocked = false;
1628                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1629                 if (kvm_is_error_hva(hva)) {
1630                         r = -EFAULT;
1631                         break;
1632                 }
1633
1634                 /* Lowest order bit is reserved */
1635                 if (keys[i] & 0x01) {
1636                         r = -EINVAL;
1637                         break;
1638                 }
1639
1640                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1641                 if (r) {
1642                         r = fixup_user_fault(current, current->mm, hva,
1643                                              FAULT_FLAG_WRITE, &unlocked);
1644                         if (r)
1645                                 break;
1646                 }
1647                 if (!r)
1648                         i++;
1649         }
1650         srcu_read_unlock(&kvm->srcu, srcu_idx);
1651         up_read(&current->mm->mmap_sem);
1652 out:
1653         kvfree(keys);
1654         return r;
1655 }
1656
1657 /*
1658  * Base address and length must be sent at the start of each block, therefore
1659  * it's cheaper to send some clean data, as long as it's less than the size of
1660  * two longs.
1661  */
1662 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1663 /* for consistency */
1664 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1665
1666 /*
1667  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1668  * address falls in a hole. In that case the index of one of the memslots
1669  * bordering the hole is returned.
1670  */
1671 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1672 {
1673         int start = 0, end = slots->used_slots;
1674         int slot = atomic_read(&slots->lru_slot);
1675         struct kvm_memory_slot *memslots = slots->memslots;
1676
1677         if (gfn >= memslots[slot].base_gfn &&
1678             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1679                 return slot;
1680
1681         while (start < end) {
1682                 slot = start + (end - start) / 2;
1683
1684                 if (gfn >= memslots[slot].base_gfn)
1685                         end = slot;
1686                 else
1687                         start = slot + 1;
1688         }
1689
1690         if (gfn >= memslots[start].base_gfn &&
1691             gfn < memslots[start].base_gfn + memslots[start].npages) {
1692                 atomic_set(&slots->lru_slot, start);
1693         }
1694
1695         return start;
1696 }
1697
1698 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1699                               u8 *res, unsigned long bufsize)
1700 {
1701         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1702
1703         args->count = 0;
1704         while (args->count < bufsize) {
1705                 hva = gfn_to_hva(kvm, cur_gfn);
1706                 /*
1707                  * We return an error if the first value was invalid, but we
1708                  * return successfully if at least one value was copied.
1709                  */
1710                 if (kvm_is_error_hva(hva))
1711                         return args->count ? 0 : -EFAULT;
1712                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1713                         pgstev = 0;
1714                 res[args->count++] = (pgstev >> 24) & 0x43;
1715                 cur_gfn++;
1716         }
1717
1718         return 0;
1719 }
1720
1721 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1722                                               unsigned long cur_gfn)
1723 {
1724         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1725         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1726         unsigned long ofs = cur_gfn - ms->base_gfn;
1727
1728         if (ms->base_gfn + ms->npages <= cur_gfn) {
1729                 slotidx--;
1730                 /* If we are above the highest slot, wrap around */
1731                 if (slotidx < 0)
1732                         slotidx = slots->used_slots - 1;
1733
1734                 ms = slots->memslots + slotidx;
1735                 ofs = 0;
1736         }
1737         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1738         while ((slotidx > 0) && (ofs >= ms->npages)) {
1739                 slotidx--;
1740                 ms = slots->memslots + slotidx;
1741                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1742         }
1743         return ms->base_gfn + ofs;
1744 }
1745
1746 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1747                              u8 *res, unsigned long bufsize)
1748 {
1749         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1750         struct kvm_memslots *slots = kvm_memslots(kvm);
1751         struct kvm_memory_slot *ms;
1752
1753         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1754         ms = gfn_to_memslot(kvm, cur_gfn);
1755         args->count = 0;
1756         args->start_gfn = cur_gfn;
1757         if (!ms)
1758                 return 0;
1759         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1760         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1761
1762         while (args->count < bufsize) {
1763                 hva = gfn_to_hva(kvm, cur_gfn);
1764                 if (kvm_is_error_hva(hva))
1765                         return 0;
1766                 /* Decrement only if we actually flipped the bit to 0 */
1767                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1768                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1769                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1770                         pgstev = 0;
1771                 /* Save the value */
1772                 res[args->count++] = (pgstev >> 24) & 0x43;
1773                 /* If the next bit is too far away, stop. */
1774                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1775                         return 0;
1776                 /* If we reached the previous "next", find the next one */
1777                 if (cur_gfn == next_gfn)
1778                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1779                 /* Reached the end of memory or of the buffer, stop */
1780                 if ((next_gfn >= mem_end) ||
1781                     (next_gfn - args->start_gfn >= bufsize))
1782                         return 0;
1783                 cur_gfn++;
1784                 /* Reached the end of the current memslot, take the next one. */
1785                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1786                         ms = gfn_to_memslot(kvm, cur_gfn);
1787                         if (!ms)
1788                                 return 0;
1789                 }
1790         }
1791         return 0;
1792 }
1793
1794 /*
1795  * This function searches for the next page with dirty CMMA attributes, and
1796  * saves the attributes in the buffer up to either the end of the buffer or
1797  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1798  * no trailing clean bytes are saved.
1799  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1800  * output buffer will indicate 0 as length.
1801  */
1802 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1803                                   struct kvm_s390_cmma_log *args)
1804 {
1805         unsigned long bufsize;
1806         int srcu_idx, peek, ret;
1807         u8 *values;
1808
1809         if (!kvm->arch.use_cmma)
1810                 return -ENXIO;
1811         /* Invalid/unsupported flags were specified */
1812         if (args->flags & ~KVM_S390_CMMA_PEEK)
1813                 return -EINVAL;
1814         /* Migration mode query, and we are not doing a migration */
1815         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1816         if (!peek && !kvm->arch.migration_mode)
1817                 return -EINVAL;
1818         /* CMMA is disabled or was not used, or the buffer has length zero */
1819         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1820         if (!bufsize || !kvm->mm->context.uses_cmm) {
1821                 memset(args, 0, sizeof(*args));
1822                 return 0;
1823         }
1824         /* We are not peeking, and there are no dirty pages */
1825         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1826                 memset(args, 0, sizeof(*args));
1827                 return 0;
1828         }
1829
1830         values = vmalloc(bufsize);
1831         if (!values)
1832                 return -ENOMEM;
1833
1834         down_read(&kvm->mm->mmap_sem);
1835         srcu_idx = srcu_read_lock(&kvm->srcu);
1836         if (peek)
1837                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1838         else
1839                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1840         srcu_read_unlock(&kvm->srcu, srcu_idx);
1841         up_read(&kvm->mm->mmap_sem);
1842
1843         if (kvm->arch.migration_mode)
1844                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1845         else
1846                 args->remaining = 0;
1847
1848         if (copy_to_user((void __user *)args->values, values, args->count))
1849                 ret = -EFAULT;
1850
1851         vfree(values);
1852         return ret;
1853 }
1854
1855 /*
1856  * This function sets the CMMA attributes for the given pages. If the input
1857  * buffer has zero length, no action is taken, otherwise the attributes are
1858  * set and the mm->context.uses_cmm flag is set.
1859  */
1860 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1861                                   const struct kvm_s390_cmma_log *args)
1862 {
1863         unsigned long hva, mask, pgstev, i;
1864         uint8_t *bits;
1865         int srcu_idx, r = 0;
1866
1867         mask = args->mask;
1868
1869         if (!kvm->arch.use_cmma)
1870                 return -ENXIO;
1871         /* invalid/unsupported flags */
1872         if (args->flags != 0)
1873                 return -EINVAL;
1874         /* Enforce sane limit on memory allocation */
1875         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1876                 return -EINVAL;
1877         /* Nothing to do */
1878         if (args->count == 0)
1879                 return 0;
1880
1881         bits = vmalloc(array_size(sizeof(*bits), args->count));
1882         if (!bits)
1883                 return -ENOMEM;
1884
1885         r = copy_from_user(bits, (void __user *)args->values, args->count);
1886         if (r) {
1887                 r = -EFAULT;
1888                 goto out;
1889         }
1890
1891         down_read(&kvm->mm->mmap_sem);
1892         srcu_idx = srcu_read_lock(&kvm->srcu);
1893         for (i = 0; i < args->count; i++) {
1894                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1895                 if (kvm_is_error_hva(hva)) {
1896                         r = -EFAULT;
1897                         break;
1898                 }
1899
1900                 pgstev = bits[i];
1901                 pgstev = pgstev << 24;
1902                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1903                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1904         }
1905         srcu_read_unlock(&kvm->srcu, srcu_idx);
1906         up_read(&kvm->mm->mmap_sem);
1907
1908         if (!kvm->mm->context.uses_cmm) {
1909                 down_write(&kvm->mm->mmap_sem);
1910                 kvm->mm->context.uses_cmm = 1;
1911                 up_write(&kvm->mm->mmap_sem);
1912         }
1913 out:
1914         vfree(bits);
1915         return r;
1916 }
1917
1918 long kvm_arch_vm_ioctl(struct file *filp,
1919                        unsigned int ioctl, unsigned long arg)
1920 {
1921         struct kvm *kvm = filp->private_data;
1922         void __user *argp = (void __user *)arg;
1923         struct kvm_device_attr attr;
1924         int r;
1925
1926         switch (ioctl) {
1927         case KVM_S390_INTERRUPT: {
1928                 struct kvm_s390_interrupt s390int;
1929
1930                 r = -EFAULT;
1931                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1932                         break;
1933                 r = kvm_s390_inject_vm(kvm, &s390int);
1934                 break;
1935         }
1936         case KVM_ENABLE_CAP: {
1937                 struct kvm_enable_cap cap;
1938                 r = -EFAULT;
1939                 if (copy_from_user(&cap, argp, sizeof(cap)))
1940                         break;
1941                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1942                 break;
1943         }
1944         case KVM_CREATE_IRQCHIP: {
1945                 struct kvm_irq_routing_entry routing;
1946
1947                 r = -EINVAL;
1948                 if (kvm->arch.use_irqchip) {
1949                         /* Set up dummy routing. */
1950                         memset(&routing, 0, sizeof(routing));
1951                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1952                 }
1953                 break;
1954         }
1955         case KVM_SET_DEVICE_ATTR: {
1956                 r = -EFAULT;
1957                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1958                         break;
1959                 r = kvm_s390_vm_set_attr(kvm, &attr);
1960                 break;
1961         }
1962         case KVM_GET_DEVICE_ATTR: {
1963                 r = -EFAULT;
1964                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1965                         break;
1966                 r = kvm_s390_vm_get_attr(kvm, &attr);
1967                 break;
1968         }
1969         case KVM_HAS_DEVICE_ATTR: {
1970                 r = -EFAULT;
1971                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1972                         break;
1973                 r = kvm_s390_vm_has_attr(kvm, &attr);
1974                 break;
1975         }
1976         case KVM_S390_GET_SKEYS: {
1977                 struct kvm_s390_skeys args;
1978
1979                 r = -EFAULT;
1980                 if (copy_from_user(&args, argp,
1981                                    sizeof(struct kvm_s390_skeys)))
1982                         break;
1983                 r = kvm_s390_get_skeys(kvm, &args);
1984                 break;
1985         }
1986         case KVM_S390_SET_SKEYS: {
1987                 struct kvm_s390_skeys args;
1988
1989                 r = -EFAULT;
1990                 if (copy_from_user(&args, argp,
1991                                    sizeof(struct kvm_s390_skeys)))
1992                         break;
1993                 r = kvm_s390_set_skeys(kvm, &args);
1994                 break;
1995         }
1996         case KVM_S390_GET_CMMA_BITS: {
1997                 struct kvm_s390_cmma_log args;
1998
1999                 r = -EFAULT;
2000                 if (copy_from_user(&args, argp, sizeof(args)))
2001                         break;
2002                 mutex_lock(&kvm->slots_lock);
2003                 r = kvm_s390_get_cmma_bits(kvm, &args);
2004                 mutex_unlock(&kvm->slots_lock);
2005                 if (!r) {
2006                         r = copy_to_user(argp, &args, sizeof(args));
2007                         if (r)
2008                                 r = -EFAULT;
2009                 }
2010                 break;
2011         }
2012         case KVM_S390_SET_CMMA_BITS: {
2013                 struct kvm_s390_cmma_log args;
2014
2015                 r = -EFAULT;
2016                 if (copy_from_user(&args, argp, sizeof(args)))
2017                         break;
2018                 mutex_lock(&kvm->slots_lock);
2019                 r = kvm_s390_set_cmma_bits(kvm, &args);
2020                 mutex_unlock(&kvm->slots_lock);
2021                 break;
2022         }
2023         default:
2024                 r = -ENOTTY;
2025         }
2026
2027         return r;
2028 }
2029
2030 static int kvm_s390_apxa_installed(void)
2031 {
2032         struct ap_config_info info;
2033
2034         if (ap_instructions_available()) {
2035                 if (ap_qci(&info) == 0)
2036                         return info.apxa;
2037         }
2038
2039         return 0;
2040 }
2041
2042 /*
2043  * The format of the crypto control block (CRYCB) is specified in the 3 low
2044  * order bits of the CRYCB designation (CRYCBD) field as follows:
2045  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2046  *           AP extended addressing (APXA) facility are installed.
2047  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2048  * Format 2: Both the APXA and MSAX3 facilities are installed
2049  */
2050 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2051 {
2052         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2053
2054         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2055         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2056
2057         /* Check whether MSAX3 is installed */
2058         if (!test_kvm_facility(kvm, 76))
2059                 return;
2060
2061         if (kvm_s390_apxa_installed())
2062                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2063         else
2064                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2065 }
2066
2067 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2068                                unsigned long *aqm, unsigned long *adm)
2069 {
2070         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2071
2072         mutex_lock(&kvm->lock);
2073         kvm_s390_vcpu_block_all(kvm);
2074
2075         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2076         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2077                 memcpy(crycb->apcb1.apm, apm, 32);
2078                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2079                          apm[0], apm[1], apm[2], apm[3]);
2080                 memcpy(crycb->apcb1.aqm, aqm, 32);
2081                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2082                          aqm[0], aqm[1], aqm[2], aqm[3]);
2083                 memcpy(crycb->apcb1.adm, adm, 32);
2084                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2085                          adm[0], adm[1], adm[2], adm[3]);
2086                 break;
2087         case CRYCB_FORMAT1:
2088         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2089                 memcpy(crycb->apcb0.apm, apm, 8);
2090                 memcpy(crycb->apcb0.aqm, aqm, 2);
2091                 memcpy(crycb->apcb0.adm, adm, 2);
2092                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2093                          apm[0], *((unsigned short *)aqm),
2094                          *((unsigned short *)adm));
2095                 break;
2096         default:        /* Can not happen */
2097                 break;
2098         }
2099
2100         /* recreate the shadow crycb for each vcpu */
2101         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2102         kvm_s390_vcpu_unblock_all(kvm);
2103         mutex_unlock(&kvm->lock);
2104 }
2105 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2106
2107 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2108 {
2109         mutex_lock(&kvm->lock);
2110         kvm_s390_vcpu_block_all(kvm);
2111
2112         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2113                sizeof(kvm->arch.crypto.crycb->apcb0));
2114         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2115                sizeof(kvm->arch.crypto.crycb->apcb1));
2116
2117         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2118         /* recreate the shadow crycb for each vcpu */
2119         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2120         kvm_s390_vcpu_unblock_all(kvm);
2121         mutex_unlock(&kvm->lock);
2122 }
2123 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2124
2125 static u64 kvm_s390_get_initial_cpuid(void)
2126 {
2127         struct cpuid cpuid;
2128
2129         get_cpu_id(&cpuid);
2130         cpuid.version = 0xff;
2131         return *((u64 *) &cpuid);
2132 }
2133
2134 static void kvm_s390_crypto_init(struct kvm *kvm)
2135 {
2136         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2137         kvm_s390_set_crycb_format(kvm);
2138
2139         if (!test_kvm_facility(kvm, 76))
2140                 return;
2141
2142         /* Enable AES/DEA protected key functions by default */
2143         kvm->arch.crypto.aes_kw = 1;
2144         kvm->arch.crypto.dea_kw = 1;
2145         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2146                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2147         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2148                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2149 }
2150
2151 static void sca_dispose(struct kvm *kvm)
2152 {
2153         if (kvm->arch.use_esca)
2154                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2155         else
2156                 free_page((unsigned long)(kvm->arch.sca));
2157         kvm->arch.sca = NULL;
2158 }
2159
2160 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2161 {
2162         gfp_t alloc_flags = GFP_KERNEL;
2163         int i, rc;
2164         char debug_name[16];
2165         static unsigned long sca_offset;
2166
2167         rc = -EINVAL;
2168 #ifdef CONFIG_KVM_S390_UCONTROL
2169         if (type & ~KVM_VM_S390_UCONTROL)
2170                 goto out_err;
2171         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2172                 goto out_err;
2173 #else
2174         if (type)
2175                 goto out_err;
2176 #endif
2177
2178         rc = s390_enable_sie();
2179         if (rc)
2180                 goto out_err;
2181
2182         rc = -ENOMEM;
2183
2184         if (!sclp.has_64bscao)
2185                 alloc_flags |= GFP_DMA;
2186         rwlock_init(&kvm->arch.sca_lock);
2187         /* start with basic SCA */
2188         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2189         if (!kvm->arch.sca)
2190                 goto out_err;
2191         spin_lock(&kvm_lock);
2192         sca_offset += 16;
2193         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2194                 sca_offset = 0;
2195         kvm->arch.sca = (struct bsca_block *)
2196                         ((char *) kvm->arch.sca + sca_offset);
2197         spin_unlock(&kvm_lock);
2198
2199         sprintf(debug_name, "kvm-%u", current->pid);
2200
2201         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2202         if (!kvm->arch.dbf)
2203                 goto out_err;
2204
2205         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2206         kvm->arch.sie_page2 =
2207              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2208         if (!kvm->arch.sie_page2)
2209                 goto out_err;
2210
2211         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2212
2213         for (i = 0; i < kvm_s390_fac_size(); i++) {
2214                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2215                                               (kvm_s390_fac_base[i] |
2216                                                kvm_s390_fac_ext[i]);
2217                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2218                                               kvm_s390_fac_base[i];
2219         }
2220
2221         /* we are always in czam mode - even on pre z14 machines */
2222         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2223         set_kvm_facility(kvm->arch.model.fac_list, 138);
2224         /* we emulate STHYI in kvm */
2225         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2226         set_kvm_facility(kvm->arch.model.fac_list, 74);
2227         if (MACHINE_HAS_TLB_GUEST) {
2228                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2229                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2230         }
2231
2232         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2233         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2234
2235         kvm_s390_crypto_init(kvm);
2236
2237         mutex_init(&kvm->arch.float_int.ais_lock);
2238         spin_lock_init(&kvm->arch.float_int.lock);
2239         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2240                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2241         init_waitqueue_head(&kvm->arch.ipte_wq);
2242         mutex_init(&kvm->arch.ipte_mutex);
2243
2244         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2245         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2246
2247         if (type & KVM_VM_S390_UCONTROL) {
2248                 kvm->arch.gmap = NULL;
2249                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2250         } else {
2251                 if (sclp.hamax == U64_MAX)
2252                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2253                 else
2254                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2255                                                     sclp.hamax + 1);
2256                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2257                 if (!kvm->arch.gmap)
2258                         goto out_err;
2259                 kvm->arch.gmap->private = kvm;
2260                 kvm->arch.gmap->pfault_enabled = 0;
2261         }
2262
2263         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2264         kvm->arch.use_skf = sclp.has_skey;
2265         spin_lock_init(&kvm->arch.start_stop_lock);
2266         kvm_s390_vsie_init(kvm);
2267         kvm_s390_gisa_init(kvm);
2268         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2269
2270         return 0;
2271 out_err:
2272         free_page((unsigned long)kvm->arch.sie_page2);
2273         debug_unregister(kvm->arch.dbf);
2274         sca_dispose(kvm);
2275         KVM_EVENT(3, "creation of vm failed: %d", rc);
2276         return rc;
2277 }
2278
2279 bool kvm_arch_has_vcpu_debugfs(void)
2280 {
2281         return false;
2282 }
2283
2284 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2285 {
2286         return 0;
2287 }
2288
2289 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2290 {
2291         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2292         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2293         kvm_s390_clear_local_irqs(vcpu);
2294         kvm_clear_async_pf_completion_queue(vcpu);
2295         if (!kvm_is_ucontrol(vcpu->kvm))
2296                 sca_del_vcpu(vcpu);
2297
2298         if (kvm_is_ucontrol(vcpu->kvm))
2299                 gmap_remove(vcpu->arch.gmap);
2300
2301         if (vcpu->kvm->arch.use_cmma)
2302                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2303         free_page((unsigned long)(vcpu->arch.sie_block));
2304
2305         kvm_vcpu_uninit(vcpu);
2306         kmem_cache_free(kvm_vcpu_cache, vcpu);
2307 }
2308
2309 static void kvm_free_vcpus(struct kvm *kvm)
2310 {
2311         unsigned int i;
2312         struct kvm_vcpu *vcpu;
2313
2314         kvm_for_each_vcpu(i, vcpu, kvm)
2315                 kvm_arch_vcpu_destroy(vcpu);
2316
2317         mutex_lock(&kvm->lock);
2318         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2319                 kvm->vcpus[i] = NULL;
2320
2321         atomic_set(&kvm->online_vcpus, 0);
2322         mutex_unlock(&kvm->lock);
2323 }
2324
2325 void kvm_arch_destroy_vm(struct kvm *kvm)
2326 {
2327         kvm_free_vcpus(kvm);
2328         sca_dispose(kvm);
2329         debug_unregister(kvm->arch.dbf);
2330         kvm_s390_gisa_destroy(kvm);
2331         free_page((unsigned long)kvm->arch.sie_page2);
2332         if (!kvm_is_ucontrol(kvm))
2333                 gmap_remove(kvm->arch.gmap);
2334         kvm_s390_destroy_adapters(kvm);
2335         kvm_s390_clear_float_irqs(kvm);
2336         kvm_s390_vsie_destroy(kvm);
2337         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2338 }
2339
2340 /* Section: vcpu related */
2341 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2342 {
2343         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2344         if (!vcpu->arch.gmap)
2345                 return -ENOMEM;
2346         vcpu->arch.gmap->private = vcpu->kvm;
2347
2348         return 0;
2349 }
2350
2351 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2352 {
2353         if (!kvm_s390_use_sca_entries())
2354                 return;
2355         read_lock(&vcpu->kvm->arch.sca_lock);
2356         if (vcpu->kvm->arch.use_esca) {
2357                 struct esca_block *sca = vcpu->kvm->arch.sca;
2358
2359                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2360                 sca->cpu[vcpu->vcpu_id].sda = 0;
2361         } else {
2362                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2363
2364                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2365                 sca->cpu[vcpu->vcpu_id].sda = 0;
2366         }
2367         read_unlock(&vcpu->kvm->arch.sca_lock);
2368 }
2369
2370 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2371 {
2372         if (!kvm_s390_use_sca_entries()) {
2373                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2374
2375                 /* we still need the basic sca for the ipte control */
2376                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2377                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2378                 return;
2379         }
2380         read_lock(&vcpu->kvm->arch.sca_lock);
2381         if (vcpu->kvm->arch.use_esca) {
2382                 struct esca_block *sca = vcpu->kvm->arch.sca;
2383
2384                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2385                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2386                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2387                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2388                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2389         } else {
2390                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2391
2392                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2393                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2394                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2395                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2396         }
2397         read_unlock(&vcpu->kvm->arch.sca_lock);
2398 }
2399
2400 /* Basic SCA to Extended SCA data copy routines */
2401 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2402 {
2403         d->sda = s->sda;
2404         d->sigp_ctrl.c = s->sigp_ctrl.c;
2405         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2406 }
2407
2408 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2409 {
2410         int i;
2411
2412         d->ipte_control = s->ipte_control;
2413         d->mcn[0] = s->mcn;
2414         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2415                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2416 }
2417
2418 static int sca_switch_to_extended(struct kvm *kvm)
2419 {
2420         struct bsca_block *old_sca = kvm->arch.sca;
2421         struct esca_block *new_sca;
2422         struct kvm_vcpu *vcpu;
2423         unsigned int vcpu_idx;
2424         u32 scaol, scaoh;
2425
2426         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2427         if (!new_sca)
2428                 return -ENOMEM;
2429
2430         scaoh = (u32)((u64)(new_sca) >> 32);
2431         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2432
2433         kvm_s390_vcpu_block_all(kvm);
2434         write_lock(&kvm->arch.sca_lock);
2435
2436         sca_copy_b_to_e(new_sca, old_sca);
2437
2438         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2439                 vcpu->arch.sie_block->scaoh = scaoh;
2440                 vcpu->arch.sie_block->scaol = scaol;
2441                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2442         }
2443         kvm->arch.sca = new_sca;
2444         kvm->arch.use_esca = 1;
2445
2446         write_unlock(&kvm->arch.sca_lock);
2447         kvm_s390_vcpu_unblock_all(kvm);
2448
2449         free_page((unsigned long)old_sca);
2450
2451         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2452                  old_sca, kvm->arch.sca);
2453         return 0;
2454 }
2455
2456 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2457 {
2458         int rc;
2459
2460         if (!kvm_s390_use_sca_entries()) {
2461                 if (id < KVM_MAX_VCPUS)
2462                         return true;
2463                 return false;
2464         }
2465         if (id < KVM_S390_BSCA_CPU_SLOTS)
2466                 return true;
2467         if (!sclp.has_esca || !sclp.has_64bscao)
2468                 return false;
2469
2470         mutex_lock(&kvm->lock);
2471         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2472         mutex_unlock(&kvm->lock);
2473
2474         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2475 }
2476
2477 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2478 {
2479         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2480         kvm_clear_async_pf_completion_queue(vcpu);
2481         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2482                                     KVM_SYNC_GPRS |
2483                                     KVM_SYNC_ACRS |
2484                                     KVM_SYNC_CRS |
2485                                     KVM_SYNC_ARCH0 |
2486                                     KVM_SYNC_PFAULT;
2487         kvm_s390_set_prefix(vcpu, 0);
2488         if (test_kvm_facility(vcpu->kvm, 64))
2489                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2490         if (test_kvm_facility(vcpu->kvm, 82))
2491                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2492         if (test_kvm_facility(vcpu->kvm, 133))
2493                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2494         if (test_kvm_facility(vcpu->kvm, 156))
2495                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2496         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2497          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2498          */
2499         if (MACHINE_HAS_VX)
2500                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2501         else
2502                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2503
2504         if (kvm_is_ucontrol(vcpu->kvm))
2505                 return __kvm_ucontrol_vcpu_init(vcpu);
2506
2507         return 0;
2508 }
2509
2510 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2511 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2512 {
2513         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2514         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2515         vcpu->arch.cputm_start = get_tod_clock_fast();
2516         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2517 }
2518
2519 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2520 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2521 {
2522         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2523         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2524         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2525         vcpu->arch.cputm_start = 0;
2526         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2527 }
2528
2529 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2530 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2531 {
2532         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2533         vcpu->arch.cputm_enabled = true;
2534         __start_cpu_timer_accounting(vcpu);
2535 }
2536
2537 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2538 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2539 {
2540         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2541         __stop_cpu_timer_accounting(vcpu);
2542         vcpu->arch.cputm_enabled = false;
2543 }
2544
2545 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2546 {
2547         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2548         __enable_cpu_timer_accounting(vcpu);
2549         preempt_enable();
2550 }
2551
2552 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2553 {
2554         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2555         __disable_cpu_timer_accounting(vcpu);
2556         preempt_enable();
2557 }
2558
2559 /* set the cpu timer - may only be called from the VCPU thread itself */
2560 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2561 {
2562         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2563         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2564         if (vcpu->arch.cputm_enabled)
2565                 vcpu->arch.cputm_start = get_tod_clock_fast();
2566         vcpu->arch.sie_block->cputm = cputm;
2567         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2568         preempt_enable();
2569 }
2570
2571 /* update and get the cpu timer - can also be called from other VCPU threads */
2572 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2573 {
2574         unsigned int seq;
2575         __u64 value;
2576
2577         if (unlikely(!vcpu->arch.cputm_enabled))
2578                 return vcpu->arch.sie_block->cputm;
2579
2580         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2581         do {
2582                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2583                 /*
2584                  * If the writer would ever execute a read in the critical
2585                  * section, e.g. in irq context, we have a deadlock.
2586                  */
2587                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2588                 value = vcpu->arch.sie_block->cputm;
2589                 /* if cputm_start is 0, accounting is being started/stopped */
2590                 if (likely(vcpu->arch.cputm_start))
2591                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2592         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2593         preempt_enable();
2594         return value;
2595 }
2596
2597 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2598 {
2599
2600         gmap_enable(vcpu->arch.enabled_gmap);
2601         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2602         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2603                 __start_cpu_timer_accounting(vcpu);
2604         vcpu->cpu = cpu;
2605 }
2606
2607 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2608 {
2609         vcpu->cpu = -1;
2610         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2611                 __stop_cpu_timer_accounting(vcpu);
2612         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2613         vcpu->arch.enabled_gmap = gmap_get_enabled();
2614         gmap_disable(vcpu->arch.enabled_gmap);
2615
2616 }
2617
2618 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2619 {
2620         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2621         vcpu->arch.sie_block->gpsw.mask = 0UL;
2622         vcpu->arch.sie_block->gpsw.addr = 0UL;
2623         kvm_s390_set_prefix(vcpu, 0);
2624         kvm_s390_set_cpu_timer(vcpu, 0);
2625         vcpu->arch.sie_block->ckc       = 0UL;
2626         vcpu->arch.sie_block->todpr     = 0;
2627         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2628         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2629                                         CR0_INTERRUPT_KEY_SUBMASK |
2630                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2631         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2632                                         CR14_UNUSED_33 |
2633                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2634         /* make sure the new fpc will be lazily loaded */
2635         save_fpu_regs();
2636         current->thread.fpu.fpc = 0;
2637         vcpu->arch.sie_block->gbea = 1;
2638         vcpu->arch.sie_block->pp = 0;
2639         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2640         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2641         kvm_clear_async_pf_completion_queue(vcpu);
2642         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2643                 kvm_s390_vcpu_stop(vcpu);
2644         kvm_s390_clear_local_irqs(vcpu);
2645 }
2646
2647 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2648 {
2649         mutex_lock(&vcpu->kvm->lock);
2650         preempt_disable();
2651         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2652         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2653         preempt_enable();
2654         mutex_unlock(&vcpu->kvm->lock);
2655         if (!kvm_is_ucontrol(vcpu->kvm)) {
2656                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2657                 sca_add_vcpu(vcpu);
2658         }
2659         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2660                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2661         /* make vcpu_load load the right gmap on the first trigger */
2662         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2663 }
2664
2665 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2666 {
2667         /*
2668          * If the AP instructions are not being interpreted and the MSAX3
2669          * facility is not configured for the guest, there is nothing to set up.
2670          */
2671         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2672                 return;
2673
2674         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2675         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2676         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2677
2678         if (vcpu->kvm->arch.crypto.apie)
2679                 vcpu->arch.sie_block->eca |= ECA_APIE;
2680
2681         /* Set up protected key support */
2682         if (vcpu->kvm->arch.crypto.aes_kw)
2683                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2684         if (vcpu->kvm->arch.crypto.dea_kw)
2685                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2686 }
2687
2688 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2689 {
2690         free_page(vcpu->arch.sie_block->cbrlo);
2691         vcpu->arch.sie_block->cbrlo = 0;
2692 }
2693
2694 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2695 {
2696         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2697         if (!vcpu->arch.sie_block->cbrlo)
2698                 return -ENOMEM;
2699         return 0;
2700 }
2701
2702 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2703 {
2704         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2705
2706         vcpu->arch.sie_block->ibc = model->ibc;
2707         if (test_kvm_facility(vcpu->kvm, 7))
2708                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2709 }
2710
2711 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2712 {
2713         int rc = 0;
2714
2715         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2716                                                     CPUSTAT_SM |
2717                                                     CPUSTAT_STOPPED);
2718
2719         if (test_kvm_facility(vcpu->kvm, 78))
2720                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2721         else if (test_kvm_facility(vcpu->kvm, 8))
2722                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2723
2724         kvm_s390_vcpu_setup_model(vcpu);
2725
2726         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2727         if (MACHINE_HAS_ESOP)
2728                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2729         if (test_kvm_facility(vcpu->kvm, 9))
2730                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2731         if (test_kvm_facility(vcpu->kvm, 73))
2732                 vcpu->arch.sie_block->ecb |= ECB_TE;
2733
2734         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2735                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2736         if (test_kvm_facility(vcpu->kvm, 130))
2737                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2738         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2739         if (sclp.has_cei)
2740                 vcpu->arch.sie_block->eca |= ECA_CEI;
2741         if (sclp.has_ib)
2742                 vcpu->arch.sie_block->eca |= ECA_IB;
2743         if (sclp.has_siif)
2744                 vcpu->arch.sie_block->eca |= ECA_SII;
2745         if (sclp.has_sigpif)
2746                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2747         if (test_kvm_facility(vcpu->kvm, 129)) {
2748                 vcpu->arch.sie_block->eca |= ECA_VX;
2749                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2750         }
2751         if (test_kvm_facility(vcpu->kvm, 139))
2752                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2753         if (test_kvm_facility(vcpu->kvm, 156))
2754                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2755         if (vcpu->arch.sie_block->gd) {
2756                 vcpu->arch.sie_block->eca |= ECA_AIV;
2757                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2758                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2759         }
2760         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2761                                         | SDNXC;
2762         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2763
2764         if (sclp.has_kss)
2765                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2766         else
2767                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2768
2769         if (vcpu->kvm->arch.use_cmma) {
2770                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2771                 if (rc)
2772                         return rc;
2773         }
2774         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2775         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2776
2777         vcpu->arch.sie_block->hpid = HPID_KVM;
2778
2779         kvm_s390_vcpu_crypto_setup(vcpu);
2780
2781         return rc;
2782 }
2783
2784 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2785                                       unsigned int id)
2786 {
2787         struct kvm_vcpu *vcpu;
2788         struct sie_page *sie_page;
2789         int rc = -EINVAL;
2790
2791         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2792                 goto out;
2793
2794         rc = -ENOMEM;
2795
2796         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2797         if (!vcpu)
2798                 goto out;
2799
2800         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2801         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2802         if (!sie_page)
2803                 goto out_free_cpu;
2804
2805         vcpu->arch.sie_block = &sie_page->sie_block;
2806         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2807
2808         /* the real guest size will always be smaller than msl */
2809         vcpu->arch.sie_block->mso = 0;
2810         vcpu->arch.sie_block->msl = sclp.hamax;
2811
2812         vcpu->arch.sie_block->icpua = id;
2813         spin_lock_init(&vcpu->arch.local_int.lock);
2814         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2815         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2816                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2817         seqcount_init(&vcpu->arch.cputm_seqcount);
2818
2819         rc = kvm_vcpu_init(vcpu, kvm, id);
2820         if (rc)
2821                 goto out_free_sie_block;
2822         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2823                  vcpu->arch.sie_block);
2824         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2825
2826         return vcpu;
2827 out_free_sie_block:
2828         free_page((unsigned long)(vcpu->arch.sie_block));
2829 out_free_cpu:
2830         kmem_cache_free(kvm_vcpu_cache, vcpu);
2831 out:
2832         return ERR_PTR(rc);
2833 }
2834
2835 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2836 {
2837         return kvm_s390_vcpu_has_irq(vcpu, 0);
2838 }
2839
2840 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2841 {
2842         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2843 }
2844
2845 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2846 {
2847         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2848         exit_sie(vcpu);
2849 }
2850
2851 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2852 {
2853         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2854 }
2855
2856 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2857 {
2858         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2859         exit_sie(vcpu);
2860 }
2861
2862 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2863 {
2864         return atomic_read(&vcpu->arch.sie_block->prog20) &
2865                (PROG_BLOCK_SIE | PROG_REQUEST);
2866 }
2867
2868 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2869 {
2870         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2871 }
2872
2873 /*
2874  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2875  * If the CPU is not running (e.g. waiting as idle) the function will
2876  * return immediately. */
2877 void exit_sie(struct kvm_vcpu *vcpu)
2878 {
2879         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2880         kvm_s390_vsie_kick(vcpu);
2881         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2882                 cpu_relax();
2883 }
2884
2885 /* Kick a guest cpu out of SIE to process a request synchronously */
2886 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2887 {
2888         kvm_make_request(req, vcpu);
2889         kvm_s390_vcpu_request(vcpu);
2890 }
2891
2892 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2893                               unsigned long end)
2894 {
2895         struct kvm *kvm = gmap->private;
2896         struct kvm_vcpu *vcpu;
2897         unsigned long prefix;
2898         int i;
2899
2900         if (gmap_is_shadow(gmap))
2901                 return;
2902         if (start >= 1UL << 31)
2903                 /* We are only interested in prefix pages */
2904                 return;
2905         kvm_for_each_vcpu(i, vcpu, kvm) {
2906                 /* match against both prefix pages */
2907                 prefix = kvm_s390_get_prefix(vcpu);
2908                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2909                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2910                                    start, end);
2911                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2912                 }
2913         }
2914 }
2915
2916 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2917 {
2918         /* kvm common code refers to this, but never calls it */
2919         BUG();
2920         return 0;
2921 }
2922
2923 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2924                                            struct kvm_one_reg *reg)
2925 {
2926         int r = -EINVAL;
2927
2928         switch (reg->id) {
2929         case KVM_REG_S390_TODPR:
2930                 r = put_user(vcpu->arch.sie_block->todpr,
2931                              (u32 __user *)reg->addr);
2932                 break;
2933         case KVM_REG_S390_EPOCHDIFF:
2934                 r = put_user(vcpu->arch.sie_block->epoch,
2935                              (u64 __user *)reg->addr);
2936                 break;
2937         case KVM_REG_S390_CPU_TIMER:
2938                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2939                              (u64 __user *)reg->addr);
2940                 break;
2941         case KVM_REG_S390_CLOCK_COMP:
2942                 r = put_user(vcpu->arch.sie_block->ckc,
2943                              (u64 __user *)reg->addr);
2944                 break;
2945         case KVM_REG_S390_PFTOKEN:
2946                 r = put_user(vcpu->arch.pfault_token,
2947                              (u64 __user *)reg->addr);
2948                 break;
2949         case KVM_REG_S390_PFCOMPARE:
2950                 r = put_user(vcpu->arch.pfault_compare,
2951                              (u64 __user *)reg->addr);
2952                 break;
2953         case KVM_REG_S390_PFSELECT:
2954                 r = put_user(vcpu->arch.pfault_select,
2955                              (u64 __user *)reg->addr);
2956                 break;
2957         case KVM_REG_S390_PP:
2958                 r = put_user(vcpu->arch.sie_block->pp,
2959                              (u64 __user *)reg->addr);
2960                 break;
2961         case KVM_REG_S390_GBEA:
2962                 r = put_user(vcpu->arch.sie_block->gbea,
2963                              (u64 __user *)reg->addr);
2964                 break;
2965         default:
2966                 break;
2967         }
2968
2969         return r;
2970 }
2971
2972 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2973                                            struct kvm_one_reg *reg)
2974 {
2975         int r = -EINVAL;
2976         __u64 val;
2977
2978         switch (reg->id) {
2979         case KVM_REG_S390_TODPR:
2980                 r = get_user(vcpu->arch.sie_block->todpr,
2981                              (u32 __user *)reg->addr);
2982                 break;
2983         case KVM_REG_S390_EPOCHDIFF:
2984                 r = get_user(vcpu->arch.sie_block->epoch,
2985                              (u64 __user *)reg->addr);
2986                 break;
2987         case KVM_REG_S390_CPU_TIMER:
2988                 r = get_user(val, (u64 __user *)reg->addr);
2989                 if (!r)
2990                         kvm_s390_set_cpu_timer(vcpu, val);
2991                 break;
2992         case KVM_REG_S390_CLOCK_COMP:
2993                 r = get_user(vcpu->arch.sie_block->ckc,
2994                              (u64 __user *)reg->addr);
2995                 break;
2996         case KVM_REG_S390_PFTOKEN:
2997                 r = get_user(vcpu->arch.pfault_token,
2998                              (u64 __user *)reg->addr);
2999                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3000                         kvm_clear_async_pf_completion_queue(vcpu);
3001                 break;
3002         case KVM_REG_S390_PFCOMPARE:
3003                 r = get_user(vcpu->arch.pfault_compare,
3004                              (u64 __user *)reg->addr);
3005                 break;
3006         case KVM_REG_S390_PFSELECT:
3007                 r = get_user(vcpu->arch.pfault_select,
3008                              (u64 __user *)reg->addr);
3009                 break;
3010         case KVM_REG_S390_PP:
3011                 r = get_user(vcpu->arch.sie_block->pp,
3012                              (u64 __user *)reg->addr);
3013                 break;
3014         case KVM_REG_S390_GBEA:
3015                 r = get_user(vcpu->arch.sie_block->gbea,
3016                              (u64 __user *)reg->addr);
3017                 break;
3018         default:
3019                 break;
3020         }
3021
3022         return r;
3023 }
3024
3025 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3026 {
3027         kvm_s390_vcpu_initial_reset(vcpu);
3028         return 0;
3029 }
3030
3031 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3032 {
3033         vcpu_load(vcpu);
3034         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3035         vcpu_put(vcpu);
3036         return 0;
3037 }
3038
3039 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3040 {
3041         vcpu_load(vcpu);
3042         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3043         vcpu_put(vcpu);
3044         return 0;
3045 }
3046
3047 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3048                                   struct kvm_sregs *sregs)
3049 {
3050         vcpu_load(vcpu);
3051
3052         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3053         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3054
3055         vcpu_put(vcpu);
3056         return 0;
3057 }
3058
3059 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3060                                   struct kvm_sregs *sregs)
3061 {
3062         vcpu_load(vcpu);
3063
3064         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3065         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3066
3067         vcpu_put(vcpu);
3068         return 0;
3069 }
3070
3071 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3072 {
3073         int ret = 0;
3074
3075         vcpu_load(vcpu);
3076
3077         if (test_fp_ctl(fpu->fpc)) {
3078                 ret = -EINVAL;
3079                 goto out;
3080         }
3081         vcpu->run->s.regs.fpc = fpu->fpc;
3082         if (MACHINE_HAS_VX)
3083                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3084                                  (freg_t *) fpu->fprs);
3085         else
3086                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3087
3088 out:
3089         vcpu_put(vcpu);
3090         return ret;
3091 }
3092
3093 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3094 {
3095         vcpu_load(vcpu);
3096
3097         /* make sure we have the latest values */
3098         save_fpu_regs();
3099         if (MACHINE_HAS_VX)
3100                 convert_vx_to_fp((freg_t *) fpu->fprs,
3101                                  (__vector128 *) vcpu->run->s.regs.vrs);
3102         else
3103                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3104         fpu->fpc = vcpu->run->s.regs.fpc;
3105
3106         vcpu_put(vcpu);
3107         return 0;
3108 }
3109
3110 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3111 {
3112         int rc = 0;
3113
3114         if (!is_vcpu_stopped(vcpu))
3115                 rc = -EBUSY;
3116         else {
3117                 vcpu->run->psw_mask = psw.mask;
3118                 vcpu->run->psw_addr = psw.addr;
3119         }
3120         return rc;
3121 }
3122
3123 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3124                                   struct kvm_translation *tr)
3125 {
3126         return -EINVAL; /* not implemented yet */
3127 }
3128
3129 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3130                               KVM_GUESTDBG_USE_HW_BP | \
3131                               KVM_GUESTDBG_ENABLE)
3132
3133 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3134                                         struct kvm_guest_debug *dbg)
3135 {
3136         int rc = 0;
3137
3138         vcpu_load(vcpu);
3139
3140         vcpu->guest_debug = 0;
3141         kvm_s390_clear_bp_data(vcpu);
3142
3143         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3144                 rc = -EINVAL;
3145                 goto out;
3146         }
3147         if (!sclp.has_gpere) {
3148                 rc = -EINVAL;
3149                 goto out;
3150         }
3151
3152         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3153                 vcpu->guest_debug = dbg->control;
3154                 /* enforce guest PER */
3155                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3156
3157                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3158                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3159         } else {
3160                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3161                 vcpu->arch.guestdbg.last_bp = 0;
3162         }
3163
3164         if (rc) {
3165                 vcpu->guest_debug = 0;
3166                 kvm_s390_clear_bp_data(vcpu);
3167                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3168         }
3169
3170 out:
3171         vcpu_put(vcpu);
3172         return rc;
3173 }
3174
3175 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3176                                     struct kvm_mp_state *mp_state)
3177 {
3178         int ret;
3179
3180         vcpu_load(vcpu);
3181
3182         /* CHECK_STOP and LOAD are not supported yet */
3183         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3184                                       KVM_MP_STATE_OPERATING;
3185
3186         vcpu_put(vcpu);
3187         return ret;
3188 }
3189
3190 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3191                                     struct kvm_mp_state *mp_state)
3192 {
3193         int rc = 0;
3194
3195         vcpu_load(vcpu);
3196
3197         /* user space knows about this interface - let it control the state */
3198         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3199
3200         switch (mp_state->mp_state) {
3201         case KVM_MP_STATE_STOPPED:
3202                 kvm_s390_vcpu_stop(vcpu);
3203                 break;
3204         case KVM_MP_STATE_OPERATING:
3205                 kvm_s390_vcpu_start(vcpu);
3206                 break;
3207         case KVM_MP_STATE_LOAD:
3208         case KVM_MP_STATE_CHECK_STOP:
3209                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3210         default:
3211                 rc = -ENXIO;
3212         }
3213
3214         vcpu_put(vcpu);
3215         return rc;
3216 }
3217
3218 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3219 {
3220         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3221 }
3222
3223 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3224 {
3225 retry:
3226         kvm_s390_vcpu_request_handled(vcpu);
3227         if (!kvm_request_pending(vcpu))
3228                 return 0;
3229         /*
3230          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3231          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3232          * This ensures that the ipte instruction for this request has
3233          * already finished. We might race against a second unmapper that
3234          * wants to set the blocking bit. Lets just retry the request loop.
3235          */
3236         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3237                 int rc;
3238                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3239                                           kvm_s390_get_prefix(vcpu),
3240                                           PAGE_SIZE * 2, PROT_WRITE);
3241                 if (rc) {
3242                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3243                         return rc;
3244                 }
3245                 goto retry;
3246         }
3247
3248         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3249                 vcpu->arch.sie_block->ihcpu = 0xffff;
3250                 goto retry;
3251         }
3252
3253         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3254                 if (!ibs_enabled(vcpu)) {
3255                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3256                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3257                 }
3258                 goto retry;
3259         }
3260
3261         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3262                 if (ibs_enabled(vcpu)) {
3263                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3264                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3265                 }
3266                 goto retry;
3267         }
3268
3269         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3270                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3271                 goto retry;
3272         }
3273
3274         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3275                 /*
3276                  * Disable CMM virtualization; we will emulate the ESSA
3277                  * instruction manually, in order to provide additional
3278                  * functionalities needed for live migration.
3279                  */
3280                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3281                 goto retry;
3282         }
3283
3284         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3285                 /*
3286                  * Re-enable CMM virtualization if CMMA is available and
3287                  * CMM has been used.
3288                  */
3289                 if ((vcpu->kvm->arch.use_cmma) &&
3290                     (vcpu->kvm->mm->context.uses_cmm))
3291                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3292                 goto retry;
3293         }
3294
3295         /* nothing to do, just clear the request */
3296         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3297         /* we left the vsie handler, nothing to do, just clear the request */
3298         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3299
3300         return 0;
3301 }
3302
3303 void kvm_s390_set_tod_clock(struct kvm *kvm,
3304                             const struct kvm_s390_vm_tod_clock *gtod)
3305 {
3306         struct kvm_vcpu *vcpu;
3307         struct kvm_s390_tod_clock_ext htod;
3308         int i;
3309
3310         mutex_lock(&kvm->lock);
3311         preempt_disable();
3312
3313         get_tod_clock_ext((char *)&htod);
3314
3315         kvm->arch.epoch = gtod->tod - htod.tod;
3316         kvm->arch.epdx = 0;
3317         if (test_kvm_facility(kvm, 139)) {
3318                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3319                 if (kvm->arch.epoch > gtod->tod)
3320                         kvm->arch.epdx -= 1;
3321         }
3322
3323         kvm_s390_vcpu_block_all(kvm);
3324         kvm_for_each_vcpu(i, vcpu, kvm) {
3325                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3326                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3327         }
3328
3329         kvm_s390_vcpu_unblock_all(kvm);
3330         preempt_enable();
3331         mutex_unlock(&kvm->lock);
3332 }
3333
3334 /**
3335  * kvm_arch_fault_in_page - fault-in guest page if necessary
3336  * @vcpu: The corresponding virtual cpu
3337  * @gpa: Guest physical address
3338  * @writable: Whether the page should be writable or not
3339  *
3340  * Make sure that a guest page has been faulted-in on the host.
3341  *
3342  * Return: Zero on success, negative error code otherwise.
3343  */
3344 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3345 {
3346         return gmap_fault(vcpu->arch.gmap, gpa,
3347                           writable ? FAULT_FLAG_WRITE : 0);
3348 }
3349
3350 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3351                                       unsigned long token)
3352 {
3353         struct kvm_s390_interrupt inti;
3354         struct kvm_s390_irq irq;
3355
3356         if (start_token) {
3357                 irq.u.ext.ext_params2 = token;
3358                 irq.type = KVM_S390_INT_PFAULT_INIT;
3359                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3360         } else {
3361                 inti.type = KVM_S390_INT_PFAULT_DONE;
3362                 inti.parm64 = token;
3363                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3364         }
3365 }
3366
3367 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3368                                      struct kvm_async_pf *work)
3369 {
3370         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3371         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3372 }
3373
3374 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3375                                  struct kvm_async_pf *work)
3376 {
3377         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3378         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3379 }
3380
3381 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3382                                struct kvm_async_pf *work)
3383 {
3384         /* s390 will always inject the page directly */
3385 }
3386
3387 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3388 {
3389         /*
3390          * s390 will always inject the page directly,
3391          * but we still want check_async_completion to cleanup
3392          */
3393         return true;
3394 }
3395
3396 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3397 {
3398         hva_t hva;
3399         struct kvm_arch_async_pf arch;
3400         int rc;
3401
3402         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3403                 return 0;
3404         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3405             vcpu->arch.pfault_compare)
3406                 return 0;
3407         if (psw_extint_disabled(vcpu))
3408                 return 0;
3409         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3410                 return 0;
3411         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3412                 return 0;
3413         if (!vcpu->arch.gmap->pfault_enabled)
3414                 return 0;
3415
3416         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3417         hva += current->thread.gmap_addr & ~PAGE_MASK;
3418         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3419                 return 0;
3420
3421         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3422         return rc;
3423 }
3424
3425 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3426 {
3427         int rc, cpuflags;
3428
3429         /*
3430          * On s390 notifications for arriving pages will be delivered directly
3431          * to the guest but the house keeping for completed pfaults is
3432          * handled outside the worker.
3433          */
3434         kvm_check_async_pf_completion(vcpu);
3435
3436         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3437         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3438
3439         if (need_resched())
3440                 schedule();
3441
3442         if (test_cpu_flag(CIF_MCCK_PENDING))
3443                 s390_handle_mcck();
3444
3445         if (!kvm_is_ucontrol(vcpu->kvm)) {
3446                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3447                 if (rc)
3448                         return rc;
3449         }
3450
3451         rc = kvm_s390_handle_requests(vcpu);
3452         if (rc)
3453                 return rc;
3454
3455         if (guestdbg_enabled(vcpu)) {
3456                 kvm_s390_backup_guest_per_regs(vcpu);
3457                 kvm_s390_patch_guest_per_regs(vcpu);
3458         }
3459
3460         vcpu->arch.sie_block->icptcode = 0;
3461         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3462         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3463         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3464
3465         return 0;
3466 }
3467
3468 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3469 {
3470         struct kvm_s390_pgm_info pgm_info = {
3471                 .code = PGM_ADDRESSING,
3472         };
3473         u8 opcode, ilen;
3474         int rc;
3475
3476         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3477         trace_kvm_s390_sie_fault(vcpu);
3478
3479         /*
3480          * We want to inject an addressing exception, which is defined as a
3481          * suppressing or terminating exception. However, since we came here
3482          * by a DAT access exception, the PSW still points to the faulting
3483          * instruction since DAT exceptions are nullifying. So we've got
3484          * to look up the current opcode to get the length of the instruction
3485          * to be able to forward the PSW.
3486          */
3487         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3488         ilen = insn_length(opcode);
3489         if (rc < 0) {
3490                 return rc;
3491         } else if (rc) {
3492                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3493                  * Forward by arbitrary ilc, injection will take care of
3494                  * nullification if necessary.
3495                  */
3496                 pgm_info = vcpu->arch.pgm;
3497                 ilen = 4;
3498         }
3499         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3500         kvm_s390_forward_psw(vcpu, ilen);
3501         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3502 }
3503
3504 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3505 {
3506         struct mcck_volatile_info *mcck_info;
3507         struct sie_page *sie_page;
3508
3509         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3510                    vcpu->arch.sie_block->icptcode);
3511         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3512
3513         if (guestdbg_enabled(vcpu))
3514                 kvm_s390_restore_guest_per_regs(vcpu);
3515
3516         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3517         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3518
3519         if (exit_reason == -EINTR) {
3520                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3521                 sie_page = container_of(vcpu->arch.sie_block,
3522                                         struct sie_page, sie_block);
3523                 mcck_info = &sie_page->mcck_info;
3524                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3525                 return 0;
3526         }
3527
3528         if (vcpu->arch.sie_block->icptcode > 0) {
3529                 int rc = kvm_handle_sie_intercept(vcpu);
3530
3531                 if (rc != -EOPNOTSUPP)
3532                         return rc;
3533                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3534                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3535                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3536                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3537                 return -EREMOTE;
3538         } else if (exit_reason != -EFAULT) {
3539                 vcpu->stat.exit_null++;
3540                 return 0;
3541         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3542                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3543                 vcpu->run->s390_ucontrol.trans_exc_code =
3544                                                 current->thread.gmap_addr;
3545                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3546                 return -EREMOTE;
3547         } else if (current->thread.gmap_pfault) {
3548                 trace_kvm_s390_major_guest_pfault(vcpu);
3549                 current->thread.gmap_pfault = 0;
3550                 if (kvm_arch_setup_async_pf(vcpu))
3551                         return 0;
3552                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3553         }
3554         return vcpu_post_run_fault_in_sie(vcpu);
3555 }
3556
3557 static int __vcpu_run(struct kvm_vcpu *vcpu)
3558 {
3559         int rc, exit_reason;
3560
3561         /*
3562          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3563          * ning the guest), so that memslots (and other stuff) are protected
3564          */
3565         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3566
3567         do {
3568                 rc = vcpu_pre_run(vcpu);
3569                 if (rc)
3570                         break;
3571
3572                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3573                 /*
3574                  * As PF_VCPU will be used in fault handler, between
3575                  * guest_enter and guest_exit should be no uaccess.
3576                  */
3577                 local_irq_disable();
3578                 guest_enter_irqoff();
3579                 __disable_cpu_timer_accounting(vcpu);
3580                 local_irq_enable();
3581                 exit_reason = sie64a(vcpu->arch.sie_block,
3582                                      vcpu->run->s.regs.gprs);
3583                 local_irq_disable();
3584                 __enable_cpu_timer_accounting(vcpu);
3585                 guest_exit_irqoff();
3586                 local_irq_enable();
3587                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3588
3589                 rc = vcpu_post_run(vcpu, exit_reason);
3590         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3591
3592         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3593         return rc;
3594 }
3595
3596 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3597 {
3598         struct runtime_instr_cb *riccb;
3599         struct gs_cb *gscb;
3600
3601         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3602         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3603         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3604         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3605         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3606                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3607         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3608                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3609                 /* some control register changes require a tlb flush */
3610                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3611         }
3612         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3613                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3614                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3615                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3616                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3617                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3618         }
3619         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3620                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3621                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3622                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3623                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3624                         kvm_clear_async_pf_completion_queue(vcpu);
3625         }
3626         /*
3627          * If userspace sets the riccb (e.g. after migration) to a valid state,
3628          * we should enable RI here instead of doing the lazy enablement.
3629          */
3630         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3631             test_kvm_facility(vcpu->kvm, 64) &&
3632             riccb->v &&
3633             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3634                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3635                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3636         }
3637         /*
3638          * If userspace sets the gscb (e.g. after migration) to non-zero,
3639          * we should enable GS here instead of doing the lazy enablement.
3640          */
3641         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3642             test_kvm_facility(vcpu->kvm, 133) &&
3643             gscb->gssm &&
3644             !vcpu->arch.gs_enabled) {
3645                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3646                 vcpu->arch.sie_block->ecb |= ECB_GS;
3647                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3648                 vcpu->arch.gs_enabled = 1;
3649         }
3650         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3651             test_kvm_facility(vcpu->kvm, 82)) {
3652                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3653                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3654         }
3655         save_access_regs(vcpu->arch.host_acrs);
3656         restore_access_regs(vcpu->run->s.regs.acrs);
3657         /* save host (userspace) fprs/vrs */
3658         save_fpu_regs();
3659         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3660         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3661         if (MACHINE_HAS_VX)
3662                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3663         else
3664                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3665         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3666         if (test_fp_ctl(current->thread.fpu.fpc))
3667                 /* User space provided an invalid FPC, let's clear it */
3668                 current->thread.fpu.fpc = 0;
3669         if (MACHINE_HAS_GS) {
3670                 preempt_disable();
3671                 __ctl_set_bit(2, 4);
3672                 if (current->thread.gs_cb) {
3673                         vcpu->arch.host_gscb = current->thread.gs_cb;
3674                         save_gs_cb(vcpu->arch.host_gscb);
3675                 }
3676                 if (vcpu->arch.gs_enabled) {
3677                         current->thread.gs_cb = (struct gs_cb *)
3678                                                 &vcpu->run->s.regs.gscb;
3679                         restore_gs_cb(current->thread.gs_cb);
3680                 }
3681                 preempt_enable();
3682         }
3683         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3684
3685         kvm_run->kvm_dirty_regs = 0;
3686 }
3687
3688 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3689 {
3690         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3691         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3692         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3693         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3694         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3695         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3696         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3697         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3698         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3699         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3700         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3701         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3702         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3703         save_access_regs(vcpu->run->s.regs.acrs);
3704         restore_access_regs(vcpu->arch.host_acrs);
3705         /* Save guest register state */
3706         save_fpu_regs();
3707         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3708         /* Restore will be done lazily at return */
3709         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3710         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3711         if (MACHINE_HAS_GS) {
3712                 __ctl_set_bit(2, 4);
3713                 if (vcpu->arch.gs_enabled)
3714                         save_gs_cb(current->thread.gs_cb);
3715                 preempt_disable();
3716                 current->thread.gs_cb = vcpu->arch.host_gscb;
3717                 restore_gs_cb(vcpu->arch.host_gscb);
3718                 preempt_enable();
3719                 if (!vcpu->arch.host_gscb)
3720                         __ctl_clear_bit(2, 4);
3721                 vcpu->arch.host_gscb = NULL;
3722         }
3723         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3724 }
3725
3726 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3727 {
3728         int rc;
3729
3730         if (kvm_run->immediate_exit)
3731                 return -EINTR;
3732
3733         vcpu_load(vcpu);
3734
3735         if (guestdbg_exit_pending(vcpu)) {
3736                 kvm_s390_prepare_debug_exit(vcpu);
3737                 rc = 0;
3738                 goto out;
3739         }
3740
3741         kvm_sigset_activate(vcpu);
3742
3743         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3744                 kvm_s390_vcpu_start(vcpu);
3745         } else if (is_vcpu_stopped(vcpu)) {
3746                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3747                                    vcpu->vcpu_id);
3748                 rc = -EINVAL;
3749                 goto out;
3750         }
3751
3752         sync_regs(vcpu, kvm_run);
3753         enable_cpu_timer_accounting(vcpu);
3754
3755         might_fault();
3756         rc = __vcpu_run(vcpu);
3757
3758         if (signal_pending(current) && !rc) {
3759                 kvm_run->exit_reason = KVM_EXIT_INTR;
3760                 rc = -EINTR;
3761         }
3762
3763         if (guestdbg_exit_pending(vcpu) && !rc)  {
3764                 kvm_s390_prepare_debug_exit(vcpu);
3765                 rc = 0;
3766         }
3767
3768         if (rc == -EREMOTE) {
3769                 /* userspace support is needed, kvm_run has been prepared */
3770                 rc = 0;
3771         }
3772
3773         disable_cpu_timer_accounting(vcpu);
3774         store_regs(vcpu, kvm_run);
3775
3776         kvm_sigset_deactivate(vcpu);
3777
3778         vcpu->stat.exit_userspace++;
3779 out:
3780         vcpu_put(vcpu);
3781         return rc;
3782 }
3783
3784 /*
3785  * store status at address
3786  * we use have two special cases:
3787  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3788  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3789  */
3790 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3791 {
3792         unsigned char archmode = 1;
3793         freg_t fprs[NUM_FPRS];
3794         unsigned int px;
3795         u64 clkcomp, cputm;
3796         int rc;
3797
3798         px = kvm_s390_get_prefix(vcpu);
3799         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3800                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3801                         return -EFAULT;
3802                 gpa = 0;
3803         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3804                 if (write_guest_real(vcpu, 163, &archmode, 1))
3805                         return -EFAULT;
3806                 gpa = px;
3807         } else
3808                 gpa -= __LC_FPREGS_SAVE_AREA;
3809
3810         /* manually convert vector registers if necessary */
3811         if (MACHINE_HAS_VX) {
3812                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3813                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3814                                      fprs, 128);
3815         } else {
3816                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3817                                      vcpu->run->s.regs.fprs, 128);
3818         }
3819         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3820                               vcpu->run->s.regs.gprs, 128);
3821         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3822                               &vcpu->arch.sie_block->gpsw, 16);
3823         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3824                               &px, 4);
3825         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3826                               &vcpu->run->s.regs.fpc, 4);
3827         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3828                               &vcpu->arch.sie_block->todpr, 4);
3829         cputm = kvm_s390_get_cpu_timer(vcpu);
3830         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3831                               &cputm, 8);
3832         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3833         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3834                               &clkcomp, 8);
3835         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3836                               &vcpu->run->s.regs.acrs, 64);
3837         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3838                               &vcpu->arch.sie_block->gcr, 128);
3839         return rc ? -EFAULT : 0;
3840 }
3841
3842 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3843 {
3844         /*
3845          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3846          * switch in the run ioctl. Let's update our copies before we save
3847          * it into the save area
3848          */
3849         save_fpu_regs();
3850         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3851         save_access_regs(vcpu->run->s.regs.acrs);
3852
3853         return kvm_s390_store_status_unloaded(vcpu, addr);
3854 }
3855
3856 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3857 {
3858         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3859         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3860 }
3861
3862 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3863 {
3864         unsigned int i;
3865         struct kvm_vcpu *vcpu;
3866
3867         kvm_for_each_vcpu(i, vcpu, kvm) {
3868                 __disable_ibs_on_vcpu(vcpu);
3869         }
3870 }
3871
3872 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3873 {
3874         if (!sclp.has_ibs)
3875                 return;
3876         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3877         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3878 }
3879
3880 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3881 {
3882         int i, online_vcpus, started_vcpus = 0;
3883
3884         if (!is_vcpu_stopped(vcpu))
3885                 return;
3886
3887         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3888         /* Only one cpu at a time may enter/leave the STOPPED state. */
3889         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3890         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3891
3892         for (i = 0; i < online_vcpus; i++) {
3893                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3894                         started_vcpus++;
3895         }
3896
3897         if (started_vcpus == 0) {
3898                 /* we're the only active VCPU -> speed it up */
3899                 __enable_ibs_on_vcpu(vcpu);
3900         } else if (started_vcpus == 1) {
3901                 /*
3902                  * As we are starting a second VCPU, we have to disable
3903                  * the IBS facility on all VCPUs to remove potentially
3904                  * oustanding ENABLE requests.
3905                  */
3906                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3907         }
3908
3909         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3910         /*
3911          * Another VCPU might have used IBS while we were offline.
3912          * Let's play safe and flush the VCPU at startup.
3913          */
3914         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3915         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3916         return;
3917 }
3918
3919 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3920 {
3921         int i, online_vcpus, started_vcpus = 0;
3922         struct kvm_vcpu *started_vcpu = NULL;
3923
3924         if (is_vcpu_stopped(vcpu))
3925                 return;
3926
3927         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3928         /* Only one cpu at a time may enter/leave the STOPPED state. */
3929         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3930         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3931
3932         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3933         kvm_s390_clear_stop_irq(vcpu);
3934
3935         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3936         __disable_ibs_on_vcpu(vcpu);
3937
3938         for (i = 0; i < online_vcpus; i++) {
3939                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3940                         started_vcpus++;
3941                         started_vcpu = vcpu->kvm->vcpus[i];
3942                 }
3943         }
3944
3945         if (started_vcpus == 1) {
3946                 /*
3947                  * As we only have one VCPU left, we want to enable the
3948                  * IBS facility for that VCPU to speed it up.
3949                  */
3950                 __enable_ibs_on_vcpu(started_vcpu);
3951         }
3952
3953         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3954         return;
3955 }
3956
3957 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3958                                      struct kvm_enable_cap *cap)
3959 {
3960         int r;
3961
3962         if (cap->flags)
3963                 return -EINVAL;
3964
3965         switch (cap->cap) {
3966         case KVM_CAP_S390_CSS_SUPPORT:
3967                 if (!vcpu->kvm->arch.css_support) {
3968                         vcpu->kvm->arch.css_support = 1;
3969                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3970                         trace_kvm_s390_enable_css(vcpu->kvm);
3971                 }
3972                 r = 0;
3973                 break;
3974         default:
3975                 r = -EINVAL;
3976                 break;
3977         }
3978         return r;
3979 }
3980
3981 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3982                                   struct kvm_s390_mem_op *mop)
3983 {
3984         void __user *uaddr = (void __user *)mop->buf;
3985         void *tmpbuf = NULL;
3986         int r, srcu_idx;
3987         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3988                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3989
3990         if (mop->flags & ~supported_flags)
3991                 return -EINVAL;
3992
3993         if (mop->size > MEM_OP_MAX_SIZE)
3994                 return -E2BIG;
3995
3996         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3997                 tmpbuf = vmalloc(mop->size);
3998                 if (!tmpbuf)
3999                         return -ENOMEM;
4000         }
4001
4002         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4003
4004         switch (mop->op) {
4005         case KVM_S390_MEMOP_LOGICAL_READ:
4006                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4007                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4008                                             mop->size, GACC_FETCH);
4009                         break;
4010                 }
4011                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4012                 if (r == 0) {
4013                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4014                                 r = -EFAULT;
4015                 }
4016                 break;
4017         case KVM_S390_MEMOP_LOGICAL_WRITE:
4018                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4019                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4020                                             mop->size, GACC_STORE);
4021                         break;
4022                 }
4023                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4024                         r = -EFAULT;
4025                         break;
4026                 }
4027                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4028                 break;
4029         default:
4030                 r = -EINVAL;
4031         }
4032
4033         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4034
4035         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4036                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4037
4038         vfree(tmpbuf);
4039         return r;
4040 }
4041
4042 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4043                                unsigned int ioctl, unsigned long arg)
4044 {
4045         struct kvm_vcpu *vcpu = filp->private_data;
4046         void __user *argp = (void __user *)arg;
4047
4048         switch (ioctl) {
4049         case KVM_S390_IRQ: {
4050                 struct kvm_s390_irq s390irq;
4051
4052                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4053                         return -EFAULT;
4054                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4055         }
4056         case KVM_S390_INTERRUPT: {
4057                 struct kvm_s390_interrupt s390int;
4058                 struct kvm_s390_irq s390irq;
4059
4060                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4061                         return -EFAULT;
4062                 if (s390int_to_s390irq(&s390int, &s390irq))
4063                         return -EINVAL;
4064                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4065         }
4066         }
4067         return -ENOIOCTLCMD;
4068 }
4069
4070 long kvm_arch_vcpu_ioctl(struct file *filp,
4071                          unsigned int ioctl, unsigned long arg)
4072 {
4073         struct kvm_vcpu *vcpu = filp->private_data;
4074         void __user *argp = (void __user *)arg;
4075         int idx;
4076         long r;
4077
4078         vcpu_load(vcpu);
4079
4080         switch (ioctl) {
4081         case KVM_S390_STORE_STATUS:
4082                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4083                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4084                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4085                 break;
4086         case KVM_S390_SET_INITIAL_PSW: {
4087                 psw_t psw;
4088
4089                 r = -EFAULT;
4090                 if (copy_from_user(&psw, argp, sizeof(psw)))
4091                         break;
4092                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4093                 break;
4094         }
4095         case KVM_S390_INITIAL_RESET:
4096                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4097                 break;
4098         case KVM_SET_ONE_REG:
4099         case KVM_GET_ONE_REG: {
4100                 struct kvm_one_reg reg;
4101                 r = -EFAULT;
4102                 if (copy_from_user(&reg, argp, sizeof(reg)))
4103                         break;
4104                 if (ioctl == KVM_SET_ONE_REG)
4105                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4106                 else
4107                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4108                 break;
4109         }
4110 #ifdef CONFIG_KVM_S390_UCONTROL
4111         case KVM_S390_UCAS_MAP: {
4112                 struct kvm_s390_ucas_mapping ucasmap;
4113
4114                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4115                         r = -EFAULT;
4116                         break;
4117                 }
4118
4119                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4120                         r = -EINVAL;
4121                         break;
4122                 }
4123
4124                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4125                                      ucasmap.vcpu_addr, ucasmap.length);
4126                 break;
4127         }
4128         case KVM_S390_UCAS_UNMAP: {
4129                 struct kvm_s390_ucas_mapping ucasmap;
4130
4131                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4132                         r = -EFAULT;
4133                         break;
4134                 }
4135
4136                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4137                         r = -EINVAL;
4138                         break;
4139                 }
4140
4141                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4142                         ucasmap.length);
4143                 break;
4144         }
4145 #endif
4146         case KVM_S390_VCPU_FAULT: {
4147                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4148                 break;
4149         }
4150         case KVM_ENABLE_CAP:
4151         {
4152                 struct kvm_enable_cap cap;
4153                 r = -EFAULT;
4154                 if (copy_from_user(&cap, argp, sizeof(cap)))
4155                         break;
4156                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4157                 break;
4158         }
4159         case KVM_S390_MEM_OP: {
4160                 struct kvm_s390_mem_op mem_op;
4161
4162                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4163                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4164                 else
4165                         r = -EFAULT;
4166                 break;
4167         }
4168         case KVM_S390_SET_IRQ_STATE: {
4169                 struct kvm_s390_irq_state irq_state;
4170
4171                 r = -EFAULT;
4172                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4173                         break;
4174                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4175                     irq_state.len == 0 ||
4176                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4177                         r = -EINVAL;
4178                         break;
4179                 }
4180                 /* do not use irq_state.flags, it will break old QEMUs */
4181                 r = kvm_s390_set_irq_state(vcpu,
4182                                            (void __user *) irq_state.buf,
4183                                            irq_state.len);
4184                 break;
4185         }
4186         case KVM_S390_GET_IRQ_STATE: {
4187                 struct kvm_s390_irq_state irq_state;
4188
4189                 r = -EFAULT;
4190                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4191                         break;
4192                 if (irq_state.len == 0) {
4193                         r = -EINVAL;
4194                         break;
4195                 }
4196                 /* do not use irq_state.flags, it will break old QEMUs */
4197                 r = kvm_s390_get_irq_state(vcpu,
4198                                            (__u8 __user *)  irq_state.buf,
4199                                            irq_state.len);
4200                 break;
4201         }
4202         default:
4203                 r = -ENOTTY;
4204         }
4205
4206         vcpu_put(vcpu);
4207         return r;
4208 }
4209
4210 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4211 {
4212 #ifdef CONFIG_KVM_S390_UCONTROL
4213         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4214                  && (kvm_is_ucontrol(vcpu->kvm))) {
4215                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4216                 get_page(vmf->page);
4217                 return 0;
4218         }
4219 #endif
4220         return VM_FAULT_SIGBUS;
4221 }
4222
4223 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4224                             unsigned long npages)
4225 {
4226         return 0;
4227 }
4228
4229 /* Section: memory related */
4230 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4231                                    struct kvm_memory_slot *memslot,
4232                                    const struct kvm_userspace_memory_region *mem,
4233                                    enum kvm_mr_change change)
4234 {
4235         /* A few sanity checks. We can have memory slots which have to be
4236            located/ended at a segment boundary (1MB). The memory in userland is
4237            ok to be fragmented into various different vmas. It is okay to mmap()
4238            and munmap() stuff in this slot after doing this call at any time */
4239
4240         if (mem->userspace_addr & 0xffffful)
4241                 return -EINVAL;
4242
4243         if (mem->memory_size & 0xffffful)
4244                 return -EINVAL;
4245
4246         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4247                 return -EINVAL;
4248
4249         return 0;
4250 }
4251
4252 void kvm_arch_commit_memory_region(struct kvm *kvm,
4253                                 const struct kvm_userspace_memory_region *mem,
4254                                 const struct kvm_memory_slot *old,
4255                                 const struct kvm_memory_slot *new,
4256                                 enum kvm_mr_change change)
4257 {
4258         int rc;
4259
4260         /* If the basics of the memslot do not change, we do not want
4261          * to update the gmap. Every update causes several unnecessary
4262          * segment translation exceptions. This is usually handled just
4263          * fine by the normal fault handler + gmap, but it will also
4264          * cause faults on the prefix page of running guest CPUs.
4265          */
4266         if (old->userspace_addr == mem->userspace_addr &&
4267             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4268             old->npages * PAGE_SIZE == mem->memory_size)
4269                 return;
4270
4271         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4272                 mem->guest_phys_addr, mem->memory_size);
4273         if (rc)
4274                 pr_warn("failed to commit memory region\n");
4275         return;
4276 }
4277
4278 static inline unsigned long nonhyp_mask(int i)
4279 {
4280         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4281
4282         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4283 }
4284
4285 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4286 {
4287         vcpu->valid_wakeup = false;
4288 }
4289
4290 static int __init kvm_s390_init(void)
4291 {
4292         int i;
4293
4294         if (!sclp.has_sief2) {
4295                 pr_info("SIE not available\n");
4296                 return -ENODEV;
4297         }
4298
4299         if (nested && hpage) {
4300                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4301                 return -EINVAL;
4302         }
4303
4304         for (i = 0; i < 16; i++)
4305                 kvm_s390_fac_base[i] |=
4306                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4307
4308         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4309 }
4310
4311 static void __exit kvm_s390_exit(void)
4312 {
4313         kvm_exit();
4314 }
4315
4316 module_init(kvm_s390_init);
4317 module_exit(kvm_s390_exit);
4318
4319 /*
4320  * Enable autoloading of the kvm module.
4321  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4322  * since x86 takes a different approach.
4323  */
4324 #include <linux/miscdevice.h>
4325 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4326 MODULE_ALIAS("devname:kvm");