KVM: s390: Add memcg accounting to KVM allocations
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         VCPU_STAT("userspace_handled", exit_userspace),
62         VCPU_STAT("exit_null", exit_null),
63         VCPU_STAT("exit_validity", exit_validity),
64         VCPU_STAT("exit_stop_request", exit_stop_request),
65         VCPU_STAT("exit_external_request", exit_external_request),
66         VCPU_STAT("exit_io_request", exit_io_request),
67         VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
68         VCPU_STAT("exit_instruction", exit_instruction),
69         VCPU_STAT("exit_pei", exit_pei),
70         VCPU_STAT("exit_program_interruption", exit_program_interruption),
71         VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
72         VCPU_STAT("exit_operation_exception", exit_operation_exception),
73         VCPU_STAT("halt_successful_poll", halt_successful_poll),
74         VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
75         VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
76         VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
77         VCPU_STAT("halt_wakeup", halt_wakeup),
78         VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
79         VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
80         VCPU_STAT("instruction_lctlg", instruction_lctlg),
81         VCPU_STAT("instruction_lctl", instruction_lctl),
82         VCPU_STAT("instruction_stctl", instruction_stctl),
83         VCPU_STAT("instruction_stctg", instruction_stctg),
84         VCPU_STAT("deliver_ckc", deliver_ckc),
85         VCPU_STAT("deliver_cputm", deliver_cputm),
86         VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
87         VCPU_STAT("deliver_external_call", deliver_external_call),
88         VCPU_STAT("deliver_service_signal", deliver_service_signal),
89         VCPU_STAT("deliver_virtio", deliver_virtio),
90         VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
91         VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
92         VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
93         VCPU_STAT("deliver_program", deliver_program),
94         VCPU_STAT("deliver_io", deliver_io),
95         VCPU_STAT("deliver_machine_check", deliver_machine_check),
96         VCPU_STAT("exit_wait_state", exit_wait_state),
97         VCPU_STAT("inject_ckc", inject_ckc),
98         VCPU_STAT("inject_cputm", inject_cputm),
99         VCPU_STAT("inject_external_call", inject_external_call),
100         VM_STAT("inject_float_mchk", inject_float_mchk),
101         VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
102         VM_STAT("inject_io", inject_io),
103         VCPU_STAT("inject_mchk", inject_mchk),
104         VM_STAT("inject_pfault_done", inject_pfault_done),
105         VCPU_STAT("inject_program", inject_program),
106         VCPU_STAT("inject_restart", inject_restart),
107         VM_STAT("inject_service_signal", inject_service_signal),
108         VCPU_STAT("inject_set_prefix", inject_set_prefix),
109         VCPU_STAT("inject_stop_signal", inject_stop_signal),
110         VCPU_STAT("inject_pfault_init", inject_pfault_init),
111         VM_STAT("inject_virtio", inject_virtio),
112         VCPU_STAT("instruction_epsw", instruction_epsw),
113         VCPU_STAT("instruction_gs", instruction_gs),
114         VCPU_STAT("instruction_io_other", instruction_io_other),
115         VCPU_STAT("instruction_lpsw", instruction_lpsw),
116         VCPU_STAT("instruction_lpswe", instruction_lpswe),
117         VCPU_STAT("instruction_pfmf", instruction_pfmf),
118         VCPU_STAT("instruction_ptff", instruction_ptff),
119         VCPU_STAT("instruction_stidp", instruction_stidp),
120         VCPU_STAT("instruction_sck", instruction_sck),
121         VCPU_STAT("instruction_sckpf", instruction_sckpf),
122         VCPU_STAT("instruction_spx", instruction_spx),
123         VCPU_STAT("instruction_stpx", instruction_stpx),
124         VCPU_STAT("instruction_stap", instruction_stap),
125         VCPU_STAT("instruction_iske", instruction_iske),
126         VCPU_STAT("instruction_ri", instruction_ri),
127         VCPU_STAT("instruction_rrbe", instruction_rrbe),
128         VCPU_STAT("instruction_sske", instruction_sske),
129         VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
130         VCPU_STAT("instruction_essa", instruction_essa),
131         VCPU_STAT("instruction_stsi", instruction_stsi),
132         VCPU_STAT("instruction_stfl", instruction_stfl),
133         VCPU_STAT("instruction_tb", instruction_tb),
134         VCPU_STAT("instruction_tpi", instruction_tpi),
135         VCPU_STAT("instruction_tprot", instruction_tprot),
136         VCPU_STAT("instruction_tsch", instruction_tsch),
137         VCPU_STAT("instruction_sthyi", instruction_sthyi),
138         VCPU_STAT("instruction_sie", instruction_sie),
139         VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
140         VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
141         VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
142         VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
143         VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
144         VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
145         VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
146         VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
147         VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
148         VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
149         VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
150         VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
151         VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
152         VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
153         VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
154         VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
155         VCPU_STAT("instruction_diag_10", diagnose_10),
156         VCPU_STAT("instruction_diag_44", diagnose_44),
157         VCPU_STAT("instruction_diag_9c", diagnose_9c),
158         VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
159         VCPU_STAT("instruction_diag_258", diagnose_258),
160         VCPU_STAT("instruction_diag_308", diagnose_308),
161         VCPU_STAT("instruction_diag_500", diagnose_500),
162         VCPU_STAT("instruction_diag_other", diagnose_other),
163         { NULL }
164 };
165
166 struct kvm_s390_tod_clock_ext {
167         __u8 epoch_idx;
168         __u64 tod;
169         __u8 reserved[7];
170 } __packed;
171
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186
187 /* if set to true, the GISA will be initialized and used if available */
188 static bool use_gisa  = true;
189 module_param(use_gisa, bool, 0644);
190 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
191
192 /*
193  * For now we handle at most 16 double words as this is what the s390 base
194  * kernel handles and stores in the prefix page. If we ever need to go beyond
195  * this, this requires changes to code, but the external uapi can stay.
196  */
197 #define SIZE_INTERNAL 16
198
199 /*
200  * Base feature mask that defines default mask for facilities. Consists of the
201  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
202  */
203 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
204 /*
205  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
206  * and defines the facilities that can be enabled via a cpu model.
207  */
208 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
209
210 static unsigned long kvm_s390_fac_size(void)
211 {
212         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
213         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
214         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
215                 sizeof(S390_lowcore.stfle_fac_list));
216
217         return SIZE_INTERNAL;
218 }
219
220 /* available cpu features supported by kvm */
221 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
222 /* available subfunctions indicated via query / "test bit" */
223 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
224
225 static struct gmap_notifier gmap_notifier;
226 static struct gmap_notifier vsie_gmap_notifier;
227 debug_info_t *kvm_s390_dbf;
228 debug_info_t *kvm_s390_dbf_uv;
229
230 /* Section: not file related */
231 int kvm_arch_hardware_enable(void)
232 {
233         /* every s390 is virtualization enabled ;-) */
234         return 0;
235 }
236
237 int kvm_arch_check_processor_compat(void *opaque)
238 {
239         return 0;
240 }
241
242 /* forward declarations */
243 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
244                               unsigned long end);
245 static int sca_switch_to_extended(struct kvm *kvm);
246
247 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
248 {
249         u8 delta_idx = 0;
250
251         /*
252          * The TOD jumps by delta, we have to compensate this by adding
253          * -delta to the epoch.
254          */
255         delta = -delta;
256
257         /* sign-extension - we're adding to signed values below */
258         if ((s64)delta < 0)
259                 delta_idx = -1;
260
261         scb->epoch += delta;
262         if (scb->ecd & ECD_MEF) {
263                 scb->epdx += delta_idx;
264                 if (scb->epoch < delta)
265                         scb->epdx += 1;
266         }
267 }
268
269 /*
270  * This callback is executed during stop_machine(). All CPUs are therefore
271  * temporarily stopped. In order not to change guest behavior, we have to
272  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
273  * so a CPU won't be stopped while calculating with the epoch.
274  */
275 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
276                           void *v)
277 {
278         struct kvm *kvm;
279         struct kvm_vcpu *vcpu;
280         int i;
281         unsigned long long *delta = v;
282
283         list_for_each_entry(kvm, &vm_list, vm_list) {
284                 kvm_for_each_vcpu(i, vcpu, kvm) {
285                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
286                         if (i == 0) {
287                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
288                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
289                         }
290                         if (vcpu->arch.cputm_enabled)
291                                 vcpu->arch.cputm_start += *delta;
292                         if (vcpu->arch.vsie_block)
293                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
294                                                    *delta);
295                 }
296         }
297         return NOTIFY_OK;
298 }
299
300 static struct notifier_block kvm_clock_notifier = {
301         .notifier_call = kvm_clock_sync,
302 };
303
304 int kvm_arch_hardware_setup(void *opaque)
305 {
306         gmap_notifier.notifier_call = kvm_gmap_notifier;
307         gmap_register_pte_notifier(&gmap_notifier);
308         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
309         gmap_register_pte_notifier(&vsie_gmap_notifier);
310         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
311                                        &kvm_clock_notifier);
312         return 0;
313 }
314
315 void kvm_arch_hardware_unsetup(void)
316 {
317         gmap_unregister_pte_notifier(&gmap_notifier);
318         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
319         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
320                                          &kvm_clock_notifier);
321 }
322
323 static void allow_cpu_feat(unsigned long nr)
324 {
325         set_bit_inv(nr, kvm_s390_available_cpu_feat);
326 }
327
328 static inline int plo_test_bit(unsigned char nr)
329 {
330         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
331         int cc;
332
333         asm volatile(
334                 /* Parameter registers are ignored for "test bit" */
335                 "       plo     0,0,0,0(0)\n"
336                 "       ipm     %0\n"
337                 "       srl     %0,28\n"
338                 : "=d" (cc)
339                 : "d" (r0)
340                 : "cc");
341         return cc == 0;
342 }
343
344 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
345 {
346         register unsigned long r0 asm("0") = 0; /* query function */
347         register unsigned long r1 asm("1") = (unsigned long) query;
348
349         asm volatile(
350                 /* Parameter regs are ignored */
351                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
352                 :
353                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
354                 : "cc", "memory");
355 }
356
357 #define INSN_SORTL 0xb938
358 #define INSN_DFLTCC 0xb939
359
360 static void kvm_s390_cpu_feat_init(void)
361 {
362         int i;
363
364         for (i = 0; i < 256; ++i) {
365                 if (plo_test_bit(i))
366                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
367         }
368
369         if (test_facility(28)) /* TOD-clock steering */
370                 ptff(kvm_s390_available_subfunc.ptff,
371                      sizeof(kvm_s390_available_subfunc.ptff),
372                      PTFF_QAF);
373
374         if (test_facility(17)) { /* MSA */
375                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
376                               kvm_s390_available_subfunc.kmac);
377                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
378                               kvm_s390_available_subfunc.kmc);
379                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
380                               kvm_s390_available_subfunc.km);
381                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
382                               kvm_s390_available_subfunc.kimd);
383                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
384                               kvm_s390_available_subfunc.klmd);
385         }
386         if (test_facility(76)) /* MSA3 */
387                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
388                               kvm_s390_available_subfunc.pckmo);
389         if (test_facility(77)) { /* MSA4 */
390                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
391                               kvm_s390_available_subfunc.kmctr);
392                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
393                               kvm_s390_available_subfunc.kmf);
394                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
395                               kvm_s390_available_subfunc.kmo);
396                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
397                               kvm_s390_available_subfunc.pcc);
398         }
399         if (test_facility(57)) /* MSA5 */
400                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
401                               kvm_s390_available_subfunc.ppno);
402
403         if (test_facility(146)) /* MSA8 */
404                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
405                               kvm_s390_available_subfunc.kma);
406
407         if (test_facility(155)) /* MSA9 */
408                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
409                               kvm_s390_available_subfunc.kdsa);
410
411         if (test_facility(150)) /* SORTL */
412                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
413
414         if (test_facility(151)) /* DFLTCC */
415                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
416
417         if (MACHINE_HAS_ESOP)
418                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
419         /*
420          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
421          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
422          */
423         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
424             !test_facility(3) || !nested)
425                 return;
426         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
427         if (sclp.has_64bscao)
428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
429         if (sclp.has_siif)
430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
431         if (sclp.has_gpere)
432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
433         if (sclp.has_gsls)
434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
435         if (sclp.has_ib)
436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
437         if (sclp.has_cei)
438                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
439         if (sclp.has_ibs)
440                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
441         if (sclp.has_kss)
442                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
443         /*
444          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
445          * all skey handling functions read/set the skey from the PGSTE
446          * instead of the real storage key.
447          *
448          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
449          * pages being detected as preserved although they are resident.
450          *
451          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
452          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
453          *
454          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
455          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
456          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
457          *
458          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
459          * cannot easily shadow the SCA because of the ipte lock.
460          */
461 }
462
463 int kvm_arch_init(void *opaque)
464 {
465         int rc = -ENOMEM;
466
467         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
468         if (!kvm_s390_dbf)
469                 return -ENOMEM;
470
471         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
472         if (!kvm_s390_dbf_uv)
473                 goto out;
474
475         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
476             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
477                 goto out;
478
479         kvm_s390_cpu_feat_init();
480
481         /* Register floating interrupt controller interface. */
482         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
483         if (rc) {
484                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
485                 goto out;
486         }
487
488         rc = kvm_s390_gib_init(GAL_ISC);
489         if (rc)
490                 goto out;
491
492         return 0;
493
494 out:
495         kvm_arch_exit();
496         return rc;
497 }
498
499 void kvm_arch_exit(void)
500 {
501         kvm_s390_gib_destroy();
502         debug_unregister(kvm_s390_dbf);
503         debug_unregister(kvm_s390_dbf_uv);
504 }
505
506 /* Section: device related */
507 long kvm_arch_dev_ioctl(struct file *filp,
508                         unsigned int ioctl, unsigned long arg)
509 {
510         if (ioctl == KVM_S390_ENABLE_SIE)
511                 return s390_enable_sie();
512         return -EINVAL;
513 }
514
515 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
516 {
517         int r;
518
519         switch (ext) {
520         case KVM_CAP_S390_PSW:
521         case KVM_CAP_S390_GMAP:
522         case KVM_CAP_SYNC_MMU:
523 #ifdef CONFIG_KVM_S390_UCONTROL
524         case KVM_CAP_S390_UCONTROL:
525 #endif
526         case KVM_CAP_ASYNC_PF:
527         case KVM_CAP_SYNC_REGS:
528         case KVM_CAP_ONE_REG:
529         case KVM_CAP_ENABLE_CAP:
530         case KVM_CAP_S390_CSS_SUPPORT:
531         case KVM_CAP_IOEVENTFD:
532         case KVM_CAP_DEVICE_CTRL:
533         case KVM_CAP_S390_IRQCHIP:
534         case KVM_CAP_VM_ATTRIBUTES:
535         case KVM_CAP_MP_STATE:
536         case KVM_CAP_IMMEDIATE_EXIT:
537         case KVM_CAP_S390_INJECT_IRQ:
538         case KVM_CAP_S390_USER_SIGP:
539         case KVM_CAP_S390_USER_STSI:
540         case KVM_CAP_S390_SKEYS:
541         case KVM_CAP_S390_IRQ_STATE:
542         case KVM_CAP_S390_USER_INSTR0:
543         case KVM_CAP_S390_CMMA_MIGRATION:
544         case KVM_CAP_S390_AIS:
545         case KVM_CAP_S390_AIS_MIGRATION:
546         case KVM_CAP_S390_VCPU_RESETS:
547         case KVM_CAP_SET_GUEST_DEBUG:
548         case KVM_CAP_S390_DIAG318:
549                 r = 1;
550                 break;
551         case KVM_CAP_S390_HPAGE_1M:
552                 r = 0;
553                 if (hpage && !kvm_is_ucontrol(kvm))
554                         r = 1;
555                 break;
556         case KVM_CAP_S390_MEM_OP:
557                 r = MEM_OP_MAX_SIZE;
558                 break;
559         case KVM_CAP_NR_VCPUS:
560         case KVM_CAP_MAX_VCPUS:
561         case KVM_CAP_MAX_VCPU_ID:
562                 r = KVM_S390_BSCA_CPU_SLOTS;
563                 if (!kvm_s390_use_sca_entries())
564                         r = KVM_MAX_VCPUS;
565                 else if (sclp.has_esca && sclp.has_64bscao)
566                         r = KVM_S390_ESCA_CPU_SLOTS;
567                 break;
568         case KVM_CAP_S390_COW:
569                 r = MACHINE_HAS_ESOP;
570                 break;
571         case KVM_CAP_S390_VECTOR_REGISTERS:
572                 r = MACHINE_HAS_VX;
573                 break;
574         case KVM_CAP_S390_RI:
575                 r = test_facility(64);
576                 break;
577         case KVM_CAP_S390_GS:
578                 r = test_facility(133);
579                 break;
580         case KVM_CAP_S390_BPB:
581                 r = test_facility(82);
582                 break;
583         case KVM_CAP_S390_PROTECTED:
584                 r = is_prot_virt_host();
585                 break;
586         default:
587                 r = 0;
588         }
589         return r;
590 }
591
592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594         int i;
595         gfn_t cur_gfn, last_gfn;
596         unsigned long gaddr, vmaddr;
597         struct gmap *gmap = kvm->arch.gmap;
598         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599
600         /* Loop over all guest segments */
601         cur_gfn = memslot->base_gfn;
602         last_gfn = memslot->base_gfn + memslot->npages;
603         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604                 gaddr = gfn_to_gpa(cur_gfn);
605                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606                 if (kvm_is_error_hva(vmaddr))
607                         continue;
608
609                 bitmap_zero(bitmap, _PAGE_ENTRIES);
610                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611                 for (i = 0; i < _PAGE_ENTRIES; i++) {
612                         if (test_bit(i, bitmap))
613                                 mark_page_dirty(kvm, cur_gfn + i);
614                 }
615
616                 if (fatal_signal_pending(current))
617                         return;
618                 cond_resched();
619         }
620 }
621
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624
625 /*
626  * Get (and clear) the dirty memory log for a memory slot.
627  */
628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629                                struct kvm_dirty_log *log)
630 {
631         int r;
632         unsigned long n;
633         struct kvm_memory_slot *memslot;
634         int is_dirty;
635
636         if (kvm_is_ucontrol(kvm))
637                 return -EINVAL;
638
639         mutex_lock(&kvm->slots_lock);
640
641         r = -EINVAL;
642         if (log->slot >= KVM_USER_MEM_SLOTS)
643                 goto out;
644
645         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646         if (r)
647                 goto out;
648
649         /* Clear the dirty log */
650         if (is_dirty) {
651                 n = kvm_dirty_bitmap_bytes(memslot);
652                 memset(memslot->dirty_bitmap, 0, n);
653         }
654         r = 0;
655 out:
656         mutex_unlock(&kvm->slots_lock);
657         return r;
658 }
659
660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662         unsigned int i;
663         struct kvm_vcpu *vcpu;
664
665         kvm_for_each_vcpu(i, vcpu, kvm) {
666                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667         }
668 }
669
670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672         int r;
673
674         if (cap->flags)
675                 return -EINVAL;
676
677         switch (cap->cap) {
678         case KVM_CAP_S390_IRQCHIP:
679                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680                 kvm->arch.use_irqchip = 1;
681                 r = 0;
682                 break;
683         case KVM_CAP_S390_USER_SIGP:
684                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685                 kvm->arch.user_sigp = 1;
686                 r = 0;
687                 break;
688         case KVM_CAP_S390_VECTOR_REGISTERS:
689                 mutex_lock(&kvm->lock);
690                 if (kvm->created_vcpus) {
691                         r = -EBUSY;
692                 } else if (MACHINE_HAS_VX) {
693                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
694                         set_kvm_facility(kvm->arch.model.fac_list, 129);
695                         if (test_facility(134)) {
696                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
697                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
698                         }
699                         if (test_facility(135)) {
700                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
701                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
702                         }
703                         if (test_facility(148)) {
704                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
705                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
706                         }
707                         if (test_facility(152)) {
708                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
709                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
710                         }
711                         r = 0;
712                 } else
713                         r = -EINVAL;
714                 mutex_unlock(&kvm->lock);
715                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716                          r ? "(not available)" : "(success)");
717                 break;
718         case KVM_CAP_S390_RI:
719                 r = -EINVAL;
720                 mutex_lock(&kvm->lock);
721                 if (kvm->created_vcpus) {
722                         r = -EBUSY;
723                 } else if (test_facility(64)) {
724                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
725                         set_kvm_facility(kvm->arch.model.fac_list, 64);
726                         r = 0;
727                 }
728                 mutex_unlock(&kvm->lock);
729                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730                          r ? "(not available)" : "(success)");
731                 break;
732         case KVM_CAP_S390_AIS:
733                 mutex_lock(&kvm->lock);
734                 if (kvm->created_vcpus) {
735                         r = -EBUSY;
736                 } else {
737                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
738                         set_kvm_facility(kvm->arch.model.fac_list, 72);
739                         r = 0;
740                 }
741                 mutex_unlock(&kvm->lock);
742                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743                          r ? "(not available)" : "(success)");
744                 break;
745         case KVM_CAP_S390_GS:
746                 r = -EINVAL;
747                 mutex_lock(&kvm->lock);
748                 if (kvm->created_vcpus) {
749                         r = -EBUSY;
750                 } else if (test_facility(133)) {
751                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
752                         set_kvm_facility(kvm->arch.model.fac_list, 133);
753                         r = 0;
754                 }
755                 mutex_unlock(&kvm->lock);
756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757                          r ? "(not available)" : "(success)");
758                 break;
759         case KVM_CAP_S390_HPAGE_1M:
760                 mutex_lock(&kvm->lock);
761                 if (kvm->created_vcpus)
762                         r = -EBUSY;
763                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764                         r = -EINVAL;
765                 else {
766                         r = 0;
767                         mmap_write_lock(kvm->mm);
768                         kvm->mm->context.allow_gmap_hpage_1m = 1;
769                         mmap_write_unlock(kvm->mm);
770                         /*
771                          * We might have to create fake 4k page
772                          * tables. To avoid that the hardware works on
773                          * stale PGSTEs, we emulate these instructions.
774                          */
775                         kvm->arch.use_skf = 0;
776                         kvm->arch.use_pfmfi = 0;
777                 }
778                 mutex_unlock(&kvm->lock);
779                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780                          r ? "(not available)" : "(success)");
781                 break;
782         case KVM_CAP_S390_USER_STSI:
783                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784                 kvm->arch.user_stsi = 1;
785                 r = 0;
786                 break;
787         case KVM_CAP_S390_USER_INSTR0:
788                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789                 kvm->arch.user_instr0 = 1;
790                 icpt_operexc_on_all_vcpus(kvm);
791                 r = 0;
792                 break;
793         default:
794                 r = -EINVAL;
795                 break;
796         }
797         return r;
798 }
799
800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802         int ret;
803
804         switch (attr->attr) {
805         case KVM_S390_VM_MEM_LIMIT_SIZE:
806                 ret = 0;
807                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808                          kvm->arch.mem_limit);
809                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810                         ret = -EFAULT;
811                 break;
812         default:
813                 ret = -ENXIO;
814                 break;
815         }
816         return ret;
817 }
818
819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821         int ret;
822         unsigned int idx;
823         switch (attr->attr) {
824         case KVM_S390_VM_MEM_ENABLE_CMMA:
825                 ret = -ENXIO;
826                 if (!sclp.has_cmma)
827                         break;
828
829                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830                 mutex_lock(&kvm->lock);
831                 if (kvm->created_vcpus)
832                         ret = -EBUSY;
833                 else if (kvm->mm->context.allow_gmap_hpage_1m)
834                         ret = -EINVAL;
835                 else {
836                         kvm->arch.use_cmma = 1;
837                         /* Not compatible with cmma. */
838                         kvm->arch.use_pfmfi = 0;
839                         ret = 0;
840                 }
841                 mutex_unlock(&kvm->lock);
842                 break;
843         case KVM_S390_VM_MEM_CLR_CMMA:
844                 ret = -ENXIO;
845                 if (!sclp.has_cmma)
846                         break;
847                 ret = -EINVAL;
848                 if (!kvm->arch.use_cmma)
849                         break;
850
851                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852                 mutex_lock(&kvm->lock);
853                 idx = srcu_read_lock(&kvm->srcu);
854                 s390_reset_cmma(kvm->arch.gmap->mm);
855                 srcu_read_unlock(&kvm->srcu, idx);
856                 mutex_unlock(&kvm->lock);
857                 ret = 0;
858                 break;
859         case KVM_S390_VM_MEM_LIMIT_SIZE: {
860                 unsigned long new_limit;
861
862                 if (kvm_is_ucontrol(kvm))
863                         return -EINVAL;
864
865                 if (get_user(new_limit, (u64 __user *)attr->addr))
866                         return -EFAULT;
867
868                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869                     new_limit > kvm->arch.mem_limit)
870                         return -E2BIG;
871
872                 if (!new_limit)
873                         return -EINVAL;
874
875                 /* gmap_create takes last usable address */
876                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
877                         new_limit -= 1;
878
879                 ret = -EBUSY;
880                 mutex_lock(&kvm->lock);
881                 if (!kvm->created_vcpus) {
882                         /* gmap_create will round the limit up */
883                         struct gmap *new = gmap_create(current->mm, new_limit);
884
885                         if (!new) {
886                                 ret = -ENOMEM;
887                         } else {
888                                 gmap_remove(kvm->arch.gmap);
889                                 new->private = kvm;
890                                 kvm->arch.gmap = new;
891                                 ret = 0;
892                         }
893                 }
894                 mutex_unlock(&kvm->lock);
895                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897                          (void *) kvm->arch.gmap->asce);
898                 break;
899         }
900         default:
901                 ret = -ENXIO;
902                 break;
903         }
904         return ret;
905 }
906
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908
909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911         struct kvm_vcpu *vcpu;
912         int i;
913
914         kvm_s390_vcpu_block_all(kvm);
915
916         kvm_for_each_vcpu(i, vcpu, kvm) {
917                 kvm_s390_vcpu_crypto_setup(vcpu);
918                 /* recreate the shadow crycb by leaving the VSIE handler */
919                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920         }
921
922         kvm_s390_vcpu_unblock_all(kvm);
923 }
924
925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927         mutex_lock(&kvm->lock);
928         switch (attr->attr) {
929         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930                 if (!test_kvm_facility(kvm, 76)) {
931                         mutex_unlock(&kvm->lock);
932                         return -EINVAL;
933                 }
934                 get_random_bytes(
935                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937                 kvm->arch.crypto.aes_kw = 1;
938                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939                 break;
940         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941                 if (!test_kvm_facility(kvm, 76)) {
942                         mutex_unlock(&kvm->lock);
943                         return -EINVAL;
944                 }
945                 get_random_bytes(
946                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948                 kvm->arch.crypto.dea_kw = 1;
949                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950                 break;
951         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952                 if (!test_kvm_facility(kvm, 76)) {
953                         mutex_unlock(&kvm->lock);
954                         return -EINVAL;
955                 }
956                 kvm->arch.crypto.aes_kw = 0;
957                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960                 break;
961         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962                 if (!test_kvm_facility(kvm, 76)) {
963                         mutex_unlock(&kvm->lock);
964                         return -EINVAL;
965                 }
966                 kvm->arch.crypto.dea_kw = 0;
967                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970                 break;
971         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972                 if (!ap_instructions_available()) {
973                         mutex_unlock(&kvm->lock);
974                         return -EOPNOTSUPP;
975                 }
976                 kvm->arch.crypto.apie = 1;
977                 break;
978         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979                 if (!ap_instructions_available()) {
980                         mutex_unlock(&kvm->lock);
981                         return -EOPNOTSUPP;
982                 }
983                 kvm->arch.crypto.apie = 0;
984                 break;
985         default:
986                 mutex_unlock(&kvm->lock);
987                 return -ENXIO;
988         }
989
990         kvm_s390_vcpu_crypto_reset_all(kvm);
991         mutex_unlock(&kvm->lock);
992         return 0;
993 }
994
995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997         int cx;
998         struct kvm_vcpu *vcpu;
999
1000         kvm_for_each_vcpu(cx, vcpu, kvm)
1001                 kvm_s390_sync_request(req, vcpu);
1002 }
1003
1004 /*
1005  * Must be called with kvm->srcu held to avoid races on memslots, and with
1006  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007  */
1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010         struct kvm_memory_slot *ms;
1011         struct kvm_memslots *slots;
1012         unsigned long ram_pages = 0;
1013         int slotnr;
1014
1015         /* migration mode already enabled */
1016         if (kvm->arch.migration_mode)
1017                 return 0;
1018         slots = kvm_memslots(kvm);
1019         if (!slots || !slots->used_slots)
1020                 return -EINVAL;
1021
1022         if (!kvm->arch.use_cmma) {
1023                 kvm->arch.migration_mode = 1;
1024                 return 0;
1025         }
1026         /* mark all the pages in active slots as dirty */
1027         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028                 ms = slots->memslots + slotnr;
1029                 if (!ms->dirty_bitmap)
1030                         return -EINVAL;
1031                 /*
1032                  * The second half of the bitmap is only used on x86,
1033                  * and would be wasted otherwise, so we put it to good
1034                  * use here to keep track of the state of the storage
1035                  * attributes.
1036                  */
1037                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038                 ram_pages += ms->npages;
1039         }
1040         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041         kvm->arch.migration_mode = 1;
1042         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043         return 0;
1044 }
1045
1046 /*
1047  * Must be called with kvm->slots_lock to avoid races with ourselves and
1048  * kvm_s390_vm_start_migration.
1049  */
1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052         /* migration mode already disabled */
1053         if (!kvm->arch.migration_mode)
1054                 return 0;
1055         kvm->arch.migration_mode = 0;
1056         if (kvm->arch.use_cmma)
1057                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058         return 0;
1059 }
1060
1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062                                      struct kvm_device_attr *attr)
1063 {
1064         int res = -ENXIO;
1065
1066         mutex_lock(&kvm->slots_lock);
1067         switch (attr->attr) {
1068         case KVM_S390_VM_MIGRATION_START:
1069                 res = kvm_s390_vm_start_migration(kvm);
1070                 break;
1071         case KVM_S390_VM_MIGRATION_STOP:
1072                 res = kvm_s390_vm_stop_migration(kvm);
1073                 break;
1074         default:
1075                 break;
1076         }
1077         mutex_unlock(&kvm->slots_lock);
1078
1079         return res;
1080 }
1081
1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083                                      struct kvm_device_attr *attr)
1084 {
1085         u64 mig = kvm->arch.migration_mode;
1086
1087         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088                 return -ENXIO;
1089
1090         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091                 return -EFAULT;
1092         return 0;
1093 }
1094
1095 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097         struct kvm_s390_vm_tod_clock gtod;
1098
1099         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1100                 return -EFAULT;
1101
1102         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1103                 return -EINVAL;
1104         kvm_s390_set_tod_clock(kvm, &gtod);
1105
1106         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1107                 gtod.epoch_idx, gtod.tod);
1108
1109         return 0;
1110 }
1111
1112 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1113 {
1114         u8 gtod_high;
1115
1116         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1117                                            sizeof(gtod_high)))
1118                 return -EFAULT;
1119
1120         if (gtod_high != 0)
1121                 return -EINVAL;
1122         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1123
1124         return 0;
1125 }
1126
1127 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1128 {
1129         struct kvm_s390_vm_tod_clock gtod = { 0 };
1130
1131         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1132                            sizeof(gtod.tod)))
1133                 return -EFAULT;
1134
1135         kvm_s390_set_tod_clock(kvm, &gtod);
1136         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1137         return 0;
1138 }
1139
1140 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1141 {
1142         int ret;
1143
1144         if (attr->flags)
1145                 return -EINVAL;
1146
1147         switch (attr->attr) {
1148         case KVM_S390_VM_TOD_EXT:
1149                 ret = kvm_s390_set_tod_ext(kvm, attr);
1150                 break;
1151         case KVM_S390_VM_TOD_HIGH:
1152                 ret = kvm_s390_set_tod_high(kvm, attr);
1153                 break;
1154         case KVM_S390_VM_TOD_LOW:
1155                 ret = kvm_s390_set_tod_low(kvm, attr);
1156                 break;
1157         default:
1158                 ret = -ENXIO;
1159                 break;
1160         }
1161         return ret;
1162 }
1163
1164 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1165                                    struct kvm_s390_vm_tod_clock *gtod)
1166 {
1167         struct kvm_s390_tod_clock_ext htod;
1168
1169         preempt_disable();
1170
1171         get_tod_clock_ext((char *)&htod);
1172
1173         gtod->tod = htod.tod + kvm->arch.epoch;
1174         gtod->epoch_idx = 0;
1175         if (test_kvm_facility(kvm, 139)) {
1176                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1177                 if (gtod->tod < htod.tod)
1178                         gtod->epoch_idx += 1;
1179         }
1180
1181         preempt_enable();
1182 }
1183
1184 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1185 {
1186         struct kvm_s390_vm_tod_clock gtod;
1187
1188         memset(&gtod, 0, sizeof(gtod));
1189         kvm_s390_get_tod_clock(kvm, &gtod);
1190         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1191                 return -EFAULT;
1192
1193         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1194                 gtod.epoch_idx, gtod.tod);
1195         return 0;
1196 }
1197
1198 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1199 {
1200         u8 gtod_high = 0;
1201
1202         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1203                                          sizeof(gtod_high)))
1204                 return -EFAULT;
1205         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1206
1207         return 0;
1208 }
1209
1210 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212         u64 gtod;
1213
1214         gtod = kvm_s390_get_tod_clock_fast(kvm);
1215         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1216                 return -EFAULT;
1217         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1218
1219         return 0;
1220 }
1221
1222 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1223 {
1224         int ret;
1225
1226         if (attr->flags)
1227                 return -EINVAL;
1228
1229         switch (attr->attr) {
1230         case KVM_S390_VM_TOD_EXT:
1231                 ret = kvm_s390_get_tod_ext(kvm, attr);
1232                 break;
1233         case KVM_S390_VM_TOD_HIGH:
1234                 ret = kvm_s390_get_tod_high(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_TOD_LOW:
1237                 ret = kvm_s390_get_tod_low(kvm, attr);
1238                 break;
1239         default:
1240                 ret = -ENXIO;
1241                 break;
1242         }
1243         return ret;
1244 }
1245
1246 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248         struct kvm_s390_vm_cpu_processor *proc;
1249         u16 lowest_ibc, unblocked_ibc;
1250         int ret = 0;
1251
1252         mutex_lock(&kvm->lock);
1253         if (kvm->created_vcpus) {
1254                 ret = -EBUSY;
1255                 goto out;
1256         }
1257         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1258         if (!proc) {
1259                 ret = -ENOMEM;
1260                 goto out;
1261         }
1262         if (!copy_from_user(proc, (void __user *)attr->addr,
1263                             sizeof(*proc))) {
1264                 kvm->arch.model.cpuid = proc->cpuid;
1265                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1266                 unblocked_ibc = sclp.ibc & 0xfff;
1267                 if (lowest_ibc && proc->ibc) {
1268                         if (proc->ibc > unblocked_ibc)
1269                                 kvm->arch.model.ibc = unblocked_ibc;
1270                         else if (proc->ibc < lowest_ibc)
1271                                 kvm->arch.model.ibc = lowest_ibc;
1272                         else
1273                                 kvm->arch.model.ibc = proc->ibc;
1274                 }
1275                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1276                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1277                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1278                          kvm->arch.model.ibc,
1279                          kvm->arch.model.cpuid);
1280                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1281                          kvm->arch.model.fac_list[0],
1282                          kvm->arch.model.fac_list[1],
1283                          kvm->arch.model.fac_list[2]);
1284         } else
1285                 ret = -EFAULT;
1286         kfree(proc);
1287 out:
1288         mutex_unlock(&kvm->lock);
1289         return ret;
1290 }
1291
1292 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1293                                        struct kvm_device_attr *attr)
1294 {
1295         struct kvm_s390_vm_cpu_feat data;
1296
1297         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1298                 return -EFAULT;
1299         if (!bitmap_subset((unsigned long *) data.feat,
1300                            kvm_s390_available_cpu_feat,
1301                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1302                 return -EINVAL;
1303
1304         mutex_lock(&kvm->lock);
1305         if (kvm->created_vcpus) {
1306                 mutex_unlock(&kvm->lock);
1307                 return -EBUSY;
1308         }
1309         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1310                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1311         mutex_unlock(&kvm->lock);
1312         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1313                          data.feat[0],
1314                          data.feat[1],
1315                          data.feat[2]);
1316         return 0;
1317 }
1318
1319 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1320                                           struct kvm_device_attr *attr)
1321 {
1322         mutex_lock(&kvm->lock);
1323         if (kvm->created_vcpus) {
1324                 mutex_unlock(&kvm->lock);
1325                 return -EBUSY;
1326         }
1327
1328         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1329                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1330                 mutex_unlock(&kvm->lock);
1331                 return -EFAULT;
1332         }
1333         mutex_unlock(&kvm->lock);
1334
1335         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1340         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1343         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1346         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1349         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1352         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1355         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1358         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1361         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1364         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1367         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1370         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1373         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1376         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1379         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1382         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1387         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1392
1393         return 0;
1394 }
1395
1396 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1397 {
1398         int ret = -ENXIO;
1399
1400         switch (attr->attr) {
1401         case KVM_S390_VM_CPU_PROCESSOR:
1402                 ret = kvm_s390_set_processor(kvm, attr);
1403                 break;
1404         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1405                 ret = kvm_s390_set_processor_feat(kvm, attr);
1406                 break;
1407         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1408                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1409                 break;
1410         }
1411         return ret;
1412 }
1413
1414 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1415 {
1416         struct kvm_s390_vm_cpu_processor *proc;
1417         int ret = 0;
1418
1419         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1420         if (!proc) {
1421                 ret = -ENOMEM;
1422                 goto out;
1423         }
1424         proc->cpuid = kvm->arch.model.cpuid;
1425         proc->ibc = kvm->arch.model.ibc;
1426         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1427                S390_ARCH_FAC_LIST_SIZE_BYTE);
1428         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1429                  kvm->arch.model.ibc,
1430                  kvm->arch.model.cpuid);
1431         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1432                  kvm->arch.model.fac_list[0],
1433                  kvm->arch.model.fac_list[1],
1434                  kvm->arch.model.fac_list[2]);
1435         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1436                 ret = -EFAULT;
1437         kfree(proc);
1438 out:
1439         return ret;
1440 }
1441
1442 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1443 {
1444         struct kvm_s390_vm_cpu_machine *mach;
1445         int ret = 0;
1446
1447         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1448         if (!mach) {
1449                 ret = -ENOMEM;
1450                 goto out;
1451         }
1452         get_cpu_id((struct cpuid *) &mach->cpuid);
1453         mach->ibc = sclp.ibc;
1454         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1455                S390_ARCH_FAC_LIST_SIZE_BYTE);
1456         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1457                sizeof(S390_lowcore.stfle_fac_list));
1458         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1459                  kvm->arch.model.ibc,
1460                  kvm->arch.model.cpuid);
1461         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1462                  mach->fac_mask[0],
1463                  mach->fac_mask[1],
1464                  mach->fac_mask[2]);
1465         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1466                  mach->fac_list[0],
1467                  mach->fac_list[1],
1468                  mach->fac_list[2]);
1469         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1470                 ret = -EFAULT;
1471         kfree(mach);
1472 out:
1473         return ret;
1474 }
1475
1476 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1477                                        struct kvm_device_attr *attr)
1478 {
1479         struct kvm_s390_vm_cpu_feat data;
1480
1481         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1482                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1483         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1484                 return -EFAULT;
1485         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1486                          data.feat[0],
1487                          data.feat[1],
1488                          data.feat[2]);
1489         return 0;
1490 }
1491
1492 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1493                                      struct kvm_device_attr *attr)
1494 {
1495         struct kvm_s390_vm_cpu_feat data;
1496
1497         bitmap_copy((unsigned long *) data.feat,
1498                     kvm_s390_available_cpu_feat,
1499                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1500         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1501                 return -EFAULT;
1502         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1503                          data.feat[0],
1504                          data.feat[1],
1505                          data.feat[2]);
1506         return 0;
1507 }
1508
1509 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1510                                           struct kvm_device_attr *attr)
1511 {
1512         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1513             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1514                 return -EFAULT;
1515
1516         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1521         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1524         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1527         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1530         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1533         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1536         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1539         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1542         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1545         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1548         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1551         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1554         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1557         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1560         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1563         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1568         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1573
1574         return 0;
1575 }
1576
1577 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1578                                         struct kvm_device_attr *attr)
1579 {
1580         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1581             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1582                 return -EFAULT;
1583
1584         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1588                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1589         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1591                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1592         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1593                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1594                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1595         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1596                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1597                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1598         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1599                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1600                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1601         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1604         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1605                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1606                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1607         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1608                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1610         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1613         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1614                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1616         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1619         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1620                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1621                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1622         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1623                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1625         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1628         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1630                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1631         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1632                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1633                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1636         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1640                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1641
1642         return 0;
1643 }
1644
1645 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1646 {
1647         int ret = -ENXIO;
1648
1649         switch (attr->attr) {
1650         case KVM_S390_VM_CPU_PROCESSOR:
1651                 ret = kvm_s390_get_processor(kvm, attr);
1652                 break;
1653         case KVM_S390_VM_CPU_MACHINE:
1654                 ret = kvm_s390_get_machine(kvm, attr);
1655                 break;
1656         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1657                 ret = kvm_s390_get_processor_feat(kvm, attr);
1658                 break;
1659         case KVM_S390_VM_CPU_MACHINE_FEAT:
1660                 ret = kvm_s390_get_machine_feat(kvm, attr);
1661                 break;
1662         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1663                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1664                 break;
1665         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1666                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1667                 break;
1668         }
1669         return ret;
1670 }
1671
1672 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1673 {
1674         int ret;
1675
1676         switch (attr->group) {
1677         case KVM_S390_VM_MEM_CTRL:
1678                 ret = kvm_s390_set_mem_control(kvm, attr);
1679                 break;
1680         case KVM_S390_VM_TOD:
1681                 ret = kvm_s390_set_tod(kvm, attr);
1682                 break;
1683         case KVM_S390_VM_CPU_MODEL:
1684                 ret = kvm_s390_set_cpu_model(kvm, attr);
1685                 break;
1686         case KVM_S390_VM_CRYPTO:
1687                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1688                 break;
1689         case KVM_S390_VM_MIGRATION:
1690                 ret = kvm_s390_vm_set_migration(kvm, attr);
1691                 break;
1692         default:
1693                 ret = -ENXIO;
1694                 break;
1695         }
1696
1697         return ret;
1698 }
1699
1700 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1701 {
1702         int ret;
1703
1704         switch (attr->group) {
1705         case KVM_S390_VM_MEM_CTRL:
1706                 ret = kvm_s390_get_mem_control(kvm, attr);
1707                 break;
1708         case KVM_S390_VM_TOD:
1709                 ret = kvm_s390_get_tod(kvm, attr);
1710                 break;
1711         case KVM_S390_VM_CPU_MODEL:
1712                 ret = kvm_s390_get_cpu_model(kvm, attr);
1713                 break;
1714         case KVM_S390_VM_MIGRATION:
1715                 ret = kvm_s390_vm_get_migration(kvm, attr);
1716                 break;
1717         default:
1718                 ret = -ENXIO;
1719                 break;
1720         }
1721
1722         return ret;
1723 }
1724
1725 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726 {
1727         int ret;
1728
1729         switch (attr->group) {
1730         case KVM_S390_VM_MEM_CTRL:
1731                 switch (attr->attr) {
1732                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1733                 case KVM_S390_VM_MEM_CLR_CMMA:
1734                         ret = sclp.has_cmma ? 0 : -ENXIO;
1735                         break;
1736                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1737                         ret = 0;
1738                         break;
1739                 default:
1740                         ret = -ENXIO;
1741                         break;
1742                 }
1743                 break;
1744         case KVM_S390_VM_TOD:
1745                 switch (attr->attr) {
1746                 case KVM_S390_VM_TOD_LOW:
1747                 case KVM_S390_VM_TOD_HIGH:
1748                         ret = 0;
1749                         break;
1750                 default:
1751                         ret = -ENXIO;
1752                         break;
1753                 }
1754                 break;
1755         case KVM_S390_VM_CPU_MODEL:
1756                 switch (attr->attr) {
1757                 case KVM_S390_VM_CPU_PROCESSOR:
1758                 case KVM_S390_VM_CPU_MACHINE:
1759                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1760                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1761                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1762                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1763                         ret = 0;
1764                         break;
1765                 default:
1766                         ret = -ENXIO;
1767                         break;
1768                 }
1769                 break;
1770         case KVM_S390_VM_CRYPTO:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1773                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1774                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1775                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1776                         ret = 0;
1777                         break;
1778                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1779                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1780                         ret = ap_instructions_available() ? 0 : -ENXIO;
1781                         break;
1782                 default:
1783                         ret = -ENXIO;
1784                         break;
1785                 }
1786                 break;
1787         case KVM_S390_VM_MIGRATION:
1788                 ret = 0;
1789                 break;
1790         default:
1791                 ret = -ENXIO;
1792                 break;
1793         }
1794
1795         return ret;
1796 }
1797
1798 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1799 {
1800         uint8_t *keys;
1801         uint64_t hva;
1802         int srcu_idx, i, r = 0;
1803
1804         if (args->flags != 0)
1805                 return -EINVAL;
1806
1807         /* Is this guest using storage keys? */
1808         if (!mm_uses_skeys(current->mm))
1809                 return KVM_S390_GET_SKEYS_NONE;
1810
1811         /* Enforce sane limit on memory allocation */
1812         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1813                 return -EINVAL;
1814
1815         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1816         if (!keys)
1817                 return -ENOMEM;
1818
1819         mmap_read_lock(current->mm);
1820         srcu_idx = srcu_read_lock(&kvm->srcu);
1821         for (i = 0; i < args->count; i++) {
1822                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1823                 if (kvm_is_error_hva(hva)) {
1824                         r = -EFAULT;
1825                         break;
1826                 }
1827
1828                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1829                 if (r)
1830                         break;
1831         }
1832         srcu_read_unlock(&kvm->srcu, srcu_idx);
1833         mmap_read_unlock(current->mm);
1834
1835         if (!r) {
1836                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1837                                  sizeof(uint8_t) * args->count);
1838                 if (r)
1839                         r = -EFAULT;
1840         }
1841
1842         kvfree(keys);
1843         return r;
1844 }
1845
1846 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1847 {
1848         uint8_t *keys;
1849         uint64_t hva;
1850         int srcu_idx, i, r = 0;
1851         bool unlocked;
1852
1853         if (args->flags != 0)
1854                 return -EINVAL;
1855
1856         /* Enforce sane limit on memory allocation */
1857         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1858                 return -EINVAL;
1859
1860         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1861         if (!keys)
1862                 return -ENOMEM;
1863
1864         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1865                            sizeof(uint8_t) * args->count);
1866         if (r) {
1867                 r = -EFAULT;
1868                 goto out;
1869         }
1870
1871         /* Enable storage key handling for the guest */
1872         r = s390_enable_skey();
1873         if (r)
1874                 goto out;
1875
1876         i = 0;
1877         mmap_read_lock(current->mm);
1878         srcu_idx = srcu_read_lock(&kvm->srcu);
1879         while (i < args->count) {
1880                 unlocked = false;
1881                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1882                 if (kvm_is_error_hva(hva)) {
1883                         r = -EFAULT;
1884                         break;
1885                 }
1886
1887                 /* Lowest order bit is reserved */
1888                 if (keys[i] & 0x01) {
1889                         r = -EINVAL;
1890                         break;
1891                 }
1892
1893                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1894                 if (r) {
1895                         r = fixup_user_fault(current->mm, hva,
1896                                              FAULT_FLAG_WRITE, &unlocked);
1897                         if (r)
1898                                 break;
1899                 }
1900                 if (!r)
1901                         i++;
1902         }
1903         srcu_read_unlock(&kvm->srcu, srcu_idx);
1904         mmap_read_unlock(current->mm);
1905 out:
1906         kvfree(keys);
1907         return r;
1908 }
1909
1910 /*
1911  * Base address and length must be sent at the start of each block, therefore
1912  * it's cheaper to send some clean data, as long as it's less than the size of
1913  * two longs.
1914  */
1915 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1916 /* for consistency */
1917 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1918
1919 /*
1920  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1921  * address falls in a hole. In that case the index of one of the memslots
1922  * bordering the hole is returned.
1923  */
1924 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1925 {
1926         int start = 0, end = slots->used_slots;
1927         int slot = atomic_read(&slots->lru_slot);
1928         struct kvm_memory_slot *memslots = slots->memslots;
1929
1930         if (gfn >= memslots[slot].base_gfn &&
1931             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1932                 return slot;
1933
1934         while (start < end) {
1935                 slot = start + (end - start) / 2;
1936
1937                 if (gfn >= memslots[slot].base_gfn)
1938                         end = slot;
1939                 else
1940                         start = slot + 1;
1941         }
1942
1943         if (start >= slots->used_slots)
1944                 return slots->used_slots - 1;
1945
1946         if (gfn >= memslots[start].base_gfn &&
1947             gfn < memslots[start].base_gfn + memslots[start].npages) {
1948                 atomic_set(&slots->lru_slot, start);
1949         }
1950
1951         return start;
1952 }
1953
1954 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1955                               u8 *res, unsigned long bufsize)
1956 {
1957         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1958
1959         args->count = 0;
1960         while (args->count < bufsize) {
1961                 hva = gfn_to_hva(kvm, cur_gfn);
1962                 /*
1963                  * We return an error if the first value was invalid, but we
1964                  * return successfully if at least one value was copied.
1965                  */
1966                 if (kvm_is_error_hva(hva))
1967                         return args->count ? 0 : -EFAULT;
1968                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1969                         pgstev = 0;
1970                 res[args->count++] = (pgstev >> 24) & 0x43;
1971                 cur_gfn++;
1972         }
1973
1974         return 0;
1975 }
1976
1977 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1978                                               unsigned long cur_gfn)
1979 {
1980         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1981         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1982         unsigned long ofs = cur_gfn - ms->base_gfn;
1983
1984         if (ms->base_gfn + ms->npages <= cur_gfn) {
1985                 slotidx--;
1986                 /* If we are above the highest slot, wrap around */
1987                 if (slotidx < 0)
1988                         slotidx = slots->used_slots - 1;
1989
1990                 ms = slots->memslots + slotidx;
1991                 ofs = 0;
1992         }
1993         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1994         while ((slotidx > 0) && (ofs >= ms->npages)) {
1995                 slotidx--;
1996                 ms = slots->memslots + slotidx;
1997                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1998         }
1999         return ms->base_gfn + ofs;
2000 }
2001
2002 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2003                              u8 *res, unsigned long bufsize)
2004 {
2005         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2006         struct kvm_memslots *slots = kvm_memslots(kvm);
2007         struct kvm_memory_slot *ms;
2008
2009         if (unlikely(!slots->used_slots))
2010                 return 0;
2011
2012         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2013         ms = gfn_to_memslot(kvm, cur_gfn);
2014         args->count = 0;
2015         args->start_gfn = cur_gfn;
2016         if (!ms)
2017                 return 0;
2018         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2019         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2020
2021         while (args->count < bufsize) {
2022                 hva = gfn_to_hva(kvm, cur_gfn);
2023                 if (kvm_is_error_hva(hva))
2024                         return 0;
2025                 /* Decrement only if we actually flipped the bit to 0 */
2026                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2027                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2028                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2029                         pgstev = 0;
2030                 /* Save the value */
2031                 res[args->count++] = (pgstev >> 24) & 0x43;
2032                 /* If the next bit is too far away, stop. */
2033                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2034                         return 0;
2035                 /* If we reached the previous "next", find the next one */
2036                 if (cur_gfn == next_gfn)
2037                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038                 /* Reached the end of memory or of the buffer, stop */
2039                 if ((next_gfn >= mem_end) ||
2040                     (next_gfn - args->start_gfn >= bufsize))
2041                         return 0;
2042                 cur_gfn++;
2043                 /* Reached the end of the current memslot, take the next one. */
2044                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2045                         ms = gfn_to_memslot(kvm, cur_gfn);
2046                         if (!ms)
2047                                 return 0;
2048                 }
2049         }
2050         return 0;
2051 }
2052
2053 /*
2054  * This function searches for the next page with dirty CMMA attributes, and
2055  * saves the attributes in the buffer up to either the end of the buffer or
2056  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2057  * no trailing clean bytes are saved.
2058  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2059  * output buffer will indicate 0 as length.
2060  */
2061 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2062                                   struct kvm_s390_cmma_log *args)
2063 {
2064         unsigned long bufsize;
2065         int srcu_idx, peek, ret;
2066         u8 *values;
2067
2068         if (!kvm->arch.use_cmma)
2069                 return -ENXIO;
2070         /* Invalid/unsupported flags were specified */
2071         if (args->flags & ~KVM_S390_CMMA_PEEK)
2072                 return -EINVAL;
2073         /* Migration mode query, and we are not doing a migration */
2074         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2075         if (!peek && !kvm->arch.migration_mode)
2076                 return -EINVAL;
2077         /* CMMA is disabled or was not used, or the buffer has length zero */
2078         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2079         if (!bufsize || !kvm->mm->context.uses_cmm) {
2080                 memset(args, 0, sizeof(*args));
2081                 return 0;
2082         }
2083         /* We are not peeking, and there are no dirty pages */
2084         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2085                 memset(args, 0, sizeof(*args));
2086                 return 0;
2087         }
2088
2089         values = vmalloc(bufsize);
2090         if (!values)
2091                 return -ENOMEM;
2092
2093         mmap_read_lock(kvm->mm);
2094         srcu_idx = srcu_read_lock(&kvm->srcu);
2095         if (peek)
2096                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2097         else
2098                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2099         srcu_read_unlock(&kvm->srcu, srcu_idx);
2100         mmap_read_unlock(kvm->mm);
2101
2102         if (kvm->arch.migration_mode)
2103                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2104         else
2105                 args->remaining = 0;
2106
2107         if (copy_to_user((void __user *)args->values, values, args->count))
2108                 ret = -EFAULT;
2109
2110         vfree(values);
2111         return ret;
2112 }
2113
2114 /*
2115  * This function sets the CMMA attributes for the given pages. If the input
2116  * buffer has zero length, no action is taken, otherwise the attributes are
2117  * set and the mm->context.uses_cmm flag is set.
2118  */
2119 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2120                                   const struct kvm_s390_cmma_log *args)
2121 {
2122         unsigned long hva, mask, pgstev, i;
2123         uint8_t *bits;
2124         int srcu_idx, r = 0;
2125
2126         mask = args->mask;
2127
2128         if (!kvm->arch.use_cmma)
2129                 return -ENXIO;
2130         /* invalid/unsupported flags */
2131         if (args->flags != 0)
2132                 return -EINVAL;
2133         /* Enforce sane limit on memory allocation */
2134         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2135                 return -EINVAL;
2136         /* Nothing to do */
2137         if (args->count == 0)
2138                 return 0;
2139
2140         bits = vmalloc(array_size(sizeof(*bits), args->count));
2141         if (!bits)
2142                 return -ENOMEM;
2143
2144         r = copy_from_user(bits, (void __user *)args->values, args->count);
2145         if (r) {
2146                 r = -EFAULT;
2147                 goto out;
2148         }
2149
2150         mmap_read_lock(kvm->mm);
2151         srcu_idx = srcu_read_lock(&kvm->srcu);
2152         for (i = 0; i < args->count; i++) {
2153                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2154                 if (kvm_is_error_hva(hva)) {
2155                         r = -EFAULT;
2156                         break;
2157                 }
2158
2159                 pgstev = bits[i];
2160                 pgstev = pgstev << 24;
2161                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2162                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2163         }
2164         srcu_read_unlock(&kvm->srcu, srcu_idx);
2165         mmap_read_unlock(kvm->mm);
2166
2167         if (!kvm->mm->context.uses_cmm) {
2168                 mmap_write_lock(kvm->mm);
2169                 kvm->mm->context.uses_cmm = 1;
2170                 mmap_write_unlock(kvm->mm);
2171         }
2172 out:
2173         vfree(bits);
2174         return r;
2175 }
2176
2177 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2178 {
2179         struct kvm_vcpu *vcpu;
2180         u16 rc, rrc;
2181         int ret = 0;
2182         int i;
2183
2184         /*
2185          * We ignore failures and try to destroy as many CPUs as possible.
2186          * At the same time we must not free the assigned resources when
2187          * this fails, as the ultravisor has still access to that memory.
2188          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2189          * behind.
2190          * We want to return the first failure rc and rrc, though.
2191          */
2192         kvm_for_each_vcpu(i, vcpu, kvm) {
2193                 mutex_lock(&vcpu->mutex);
2194                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2195                         *rcp = rc;
2196                         *rrcp = rrc;
2197                         ret = -EIO;
2198                 }
2199                 mutex_unlock(&vcpu->mutex);
2200         }
2201         return ret;
2202 }
2203
2204 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2205 {
2206         int i, r = 0;
2207         u16 dummy;
2208
2209         struct kvm_vcpu *vcpu;
2210
2211         kvm_for_each_vcpu(i, vcpu, kvm) {
2212                 mutex_lock(&vcpu->mutex);
2213                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2214                 mutex_unlock(&vcpu->mutex);
2215                 if (r)
2216                         break;
2217         }
2218         if (r)
2219                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220         return r;
2221 }
2222
2223 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 {
2225         int r = 0;
2226         u16 dummy;
2227         void __user *argp = (void __user *)cmd->data;
2228
2229         switch (cmd->cmd) {
2230         case KVM_PV_ENABLE: {
2231                 r = -EINVAL;
2232                 if (kvm_s390_pv_is_protected(kvm))
2233                         break;
2234
2235                 /*
2236                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2237                  *  esca, we need no cleanup in the error cases below
2238                  */
2239                 r = sca_switch_to_extended(kvm);
2240                 if (r)
2241                         break;
2242
2243                 mmap_write_lock(current->mm);
2244                 r = gmap_mark_unmergeable();
2245                 mmap_write_unlock(current->mm);
2246                 if (r)
2247                         break;
2248
2249                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250                 if (r)
2251                         break;
2252
2253                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2254                 if (r)
2255                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2256
2257                 /* we need to block service interrupts from now on */
2258                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2259                 break;
2260         }
2261         case KVM_PV_DISABLE: {
2262                 r = -EINVAL;
2263                 if (!kvm_s390_pv_is_protected(kvm))
2264                         break;
2265
2266                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2267                 /*
2268                  * If a CPU could not be destroyed, destroy VM will also fail.
2269                  * There is no point in trying to destroy it. Instead return
2270                  * the rc and rrc from the first CPU that failed destroying.
2271                  */
2272                 if (r)
2273                         break;
2274                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2275
2276                 /* no need to block service interrupts any more */
2277                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278                 break;
2279         }
2280         case KVM_PV_SET_SEC_PARMS: {
2281                 struct kvm_s390_pv_sec_parm parms = {};
2282                 void *hdr;
2283
2284                 r = -EINVAL;
2285                 if (!kvm_s390_pv_is_protected(kvm))
2286                         break;
2287
2288                 r = -EFAULT;
2289                 if (copy_from_user(&parms, argp, sizeof(parms)))
2290                         break;
2291
2292                 /* Currently restricted to 8KB */
2293                 r = -EINVAL;
2294                 if (parms.length > PAGE_SIZE * 2)
2295                         break;
2296
2297                 r = -ENOMEM;
2298                 hdr = vmalloc(parms.length);
2299                 if (!hdr)
2300                         break;
2301
2302                 r = -EFAULT;
2303                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2304                                     parms.length))
2305                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2306                                                       &cmd->rc, &cmd->rrc);
2307
2308                 vfree(hdr);
2309                 break;
2310         }
2311         case KVM_PV_UNPACK: {
2312                 struct kvm_s390_pv_unp unp = {};
2313
2314                 r = -EINVAL;
2315                 if (!kvm_s390_pv_is_protected(kvm))
2316                         break;
2317
2318                 r = -EFAULT;
2319                 if (copy_from_user(&unp, argp, sizeof(unp)))
2320                         break;
2321
2322                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2323                                        &cmd->rc, &cmd->rrc);
2324                 break;
2325         }
2326         case KVM_PV_VERIFY: {
2327                 r = -EINVAL;
2328                 if (!kvm_s390_pv_is_protected(kvm))
2329                         break;
2330
2331                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2332                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2333                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334                              cmd->rrc);
2335                 break;
2336         }
2337         case KVM_PV_PREP_RESET: {
2338                 r = -EINVAL;
2339                 if (!kvm_s390_pv_is_protected(kvm))
2340                         break;
2341
2342                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2343                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2344                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345                              cmd->rc, cmd->rrc);
2346                 break;
2347         }
2348         case KVM_PV_UNSHARE_ALL: {
2349                 r = -EINVAL;
2350                 if (!kvm_s390_pv_is_protected(kvm))
2351                         break;
2352
2353                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2354                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2355                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2356                              cmd->rc, cmd->rrc);
2357                 break;
2358         }
2359         default:
2360                 r = -ENOTTY;
2361         }
2362         return r;
2363 }
2364
2365 long kvm_arch_vm_ioctl(struct file *filp,
2366                        unsigned int ioctl, unsigned long arg)
2367 {
2368         struct kvm *kvm = filp->private_data;
2369         void __user *argp = (void __user *)arg;
2370         struct kvm_device_attr attr;
2371         int r;
2372
2373         switch (ioctl) {
2374         case KVM_S390_INTERRUPT: {
2375                 struct kvm_s390_interrupt s390int;
2376
2377                 r = -EFAULT;
2378                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2379                         break;
2380                 r = kvm_s390_inject_vm(kvm, &s390int);
2381                 break;
2382         }
2383         case KVM_CREATE_IRQCHIP: {
2384                 struct kvm_irq_routing_entry routing;
2385
2386                 r = -EINVAL;
2387                 if (kvm->arch.use_irqchip) {
2388                         /* Set up dummy routing. */
2389                         memset(&routing, 0, sizeof(routing));
2390                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2391                 }
2392                 break;
2393         }
2394         case KVM_SET_DEVICE_ATTR: {
2395                 r = -EFAULT;
2396                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2397                         break;
2398                 r = kvm_s390_vm_set_attr(kvm, &attr);
2399                 break;
2400         }
2401         case KVM_GET_DEVICE_ATTR: {
2402                 r = -EFAULT;
2403                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2404                         break;
2405                 r = kvm_s390_vm_get_attr(kvm, &attr);
2406                 break;
2407         }
2408         case KVM_HAS_DEVICE_ATTR: {
2409                 r = -EFAULT;
2410                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2411                         break;
2412                 r = kvm_s390_vm_has_attr(kvm, &attr);
2413                 break;
2414         }
2415         case KVM_S390_GET_SKEYS: {
2416                 struct kvm_s390_skeys args;
2417
2418                 r = -EFAULT;
2419                 if (copy_from_user(&args, argp,
2420                                    sizeof(struct kvm_s390_skeys)))
2421                         break;
2422                 r = kvm_s390_get_skeys(kvm, &args);
2423                 break;
2424         }
2425         case KVM_S390_SET_SKEYS: {
2426                 struct kvm_s390_skeys args;
2427
2428                 r = -EFAULT;
2429                 if (copy_from_user(&args, argp,
2430                                    sizeof(struct kvm_s390_skeys)))
2431                         break;
2432                 r = kvm_s390_set_skeys(kvm, &args);
2433                 break;
2434         }
2435         case KVM_S390_GET_CMMA_BITS: {
2436                 struct kvm_s390_cmma_log args;
2437
2438                 r = -EFAULT;
2439                 if (copy_from_user(&args, argp, sizeof(args)))
2440                         break;
2441                 mutex_lock(&kvm->slots_lock);
2442                 r = kvm_s390_get_cmma_bits(kvm, &args);
2443                 mutex_unlock(&kvm->slots_lock);
2444                 if (!r) {
2445                         r = copy_to_user(argp, &args, sizeof(args));
2446                         if (r)
2447                                 r = -EFAULT;
2448                 }
2449                 break;
2450         }
2451         case KVM_S390_SET_CMMA_BITS: {
2452                 struct kvm_s390_cmma_log args;
2453
2454                 r = -EFAULT;
2455                 if (copy_from_user(&args, argp, sizeof(args)))
2456                         break;
2457                 mutex_lock(&kvm->slots_lock);
2458                 r = kvm_s390_set_cmma_bits(kvm, &args);
2459                 mutex_unlock(&kvm->slots_lock);
2460                 break;
2461         }
2462         case KVM_S390_PV_COMMAND: {
2463                 struct kvm_pv_cmd args;
2464
2465                 /* protvirt means user sigp */
2466                 kvm->arch.user_cpu_state_ctrl = 1;
2467                 r = 0;
2468                 if (!is_prot_virt_host()) {
2469                         r = -EINVAL;
2470                         break;
2471                 }
2472                 if (copy_from_user(&args, argp, sizeof(args))) {
2473                         r = -EFAULT;
2474                         break;
2475                 }
2476                 if (args.flags) {
2477                         r = -EINVAL;
2478                         break;
2479                 }
2480                 mutex_lock(&kvm->lock);
2481                 r = kvm_s390_handle_pv(kvm, &args);
2482                 mutex_unlock(&kvm->lock);
2483                 if (copy_to_user(argp, &args, sizeof(args))) {
2484                         r = -EFAULT;
2485                         break;
2486                 }
2487                 break;
2488         }
2489         default:
2490                 r = -ENOTTY;
2491         }
2492
2493         return r;
2494 }
2495
2496 static int kvm_s390_apxa_installed(void)
2497 {
2498         struct ap_config_info info;
2499
2500         if (ap_instructions_available()) {
2501                 if (ap_qci(&info) == 0)
2502                         return info.apxa;
2503         }
2504
2505         return 0;
2506 }
2507
2508 /*
2509  * The format of the crypto control block (CRYCB) is specified in the 3 low
2510  * order bits of the CRYCB designation (CRYCBD) field as follows:
2511  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2512  *           AP extended addressing (APXA) facility are installed.
2513  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2514  * Format 2: Both the APXA and MSAX3 facilities are installed
2515  */
2516 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2517 {
2518         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2519
2520         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2521         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2522
2523         /* Check whether MSAX3 is installed */
2524         if (!test_kvm_facility(kvm, 76))
2525                 return;
2526
2527         if (kvm_s390_apxa_installed())
2528                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2529         else
2530                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2531 }
2532
2533 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2534                                unsigned long *aqm, unsigned long *adm)
2535 {
2536         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2537
2538         mutex_lock(&kvm->lock);
2539         kvm_s390_vcpu_block_all(kvm);
2540
2541         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2542         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2543                 memcpy(crycb->apcb1.apm, apm, 32);
2544                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2545                          apm[0], apm[1], apm[2], apm[3]);
2546                 memcpy(crycb->apcb1.aqm, aqm, 32);
2547                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2548                          aqm[0], aqm[1], aqm[2], aqm[3]);
2549                 memcpy(crycb->apcb1.adm, adm, 32);
2550                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2551                          adm[0], adm[1], adm[2], adm[3]);
2552                 break;
2553         case CRYCB_FORMAT1:
2554         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2555                 memcpy(crycb->apcb0.apm, apm, 8);
2556                 memcpy(crycb->apcb0.aqm, aqm, 2);
2557                 memcpy(crycb->apcb0.adm, adm, 2);
2558                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2559                          apm[0], *((unsigned short *)aqm),
2560                          *((unsigned short *)adm));
2561                 break;
2562         default:        /* Can not happen */
2563                 break;
2564         }
2565
2566         /* recreate the shadow crycb for each vcpu */
2567         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2568         kvm_s390_vcpu_unblock_all(kvm);
2569         mutex_unlock(&kvm->lock);
2570 }
2571 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2572
2573 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2574 {
2575         mutex_lock(&kvm->lock);
2576         kvm_s390_vcpu_block_all(kvm);
2577
2578         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2579                sizeof(kvm->arch.crypto.crycb->apcb0));
2580         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2581                sizeof(kvm->arch.crypto.crycb->apcb1));
2582
2583         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2584         /* recreate the shadow crycb for each vcpu */
2585         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2586         kvm_s390_vcpu_unblock_all(kvm);
2587         mutex_unlock(&kvm->lock);
2588 }
2589 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2590
2591 static u64 kvm_s390_get_initial_cpuid(void)
2592 {
2593         struct cpuid cpuid;
2594
2595         get_cpu_id(&cpuid);
2596         cpuid.version = 0xff;
2597         return *((u64 *) &cpuid);
2598 }
2599
2600 static void kvm_s390_crypto_init(struct kvm *kvm)
2601 {
2602         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2603         kvm_s390_set_crycb_format(kvm);
2604
2605         if (!test_kvm_facility(kvm, 76))
2606                 return;
2607
2608         /* Enable AES/DEA protected key functions by default */
2609         kvm->arch.crypto.aes_kw = 1;
2610         kvm->arch.crypto.dea_kw = 1;
2611         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2612                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2613         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2614                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2615 }
2616
2617 static void sca_dispose(struct kvm *kvm)
2618 {
2619         if (kvm->arch.use_esca)
2620                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2621         else
2622                 free_page((unsigned long)(kvm->arch.sca));
2623         kvm->arch.sca = NULL;
2624 }
2625
2626 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2627 {
2628         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2629         int i, rc;
2630         char debug_name[16];
2631         static unsigned long sca_offset;
2632
2633         rc = -EINVAL;
2634 #ifdef CONFIG_KVM_S390_UCONTROL
2635         if (type & ~KVM_VM_S390_UCONTROL)
2636                 goto out_err;
2637         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2638                 goto out_err;
2639 #else
2640         if (type)
2641                 goto out_err;
2642 #endif
2643
2644         rc = s390_enable_sie();
2645         if (rc)
2646                 goto out_err;
2647
2648         rc = -ENOMEM;
2649
2650         if (!sclp.has_64bscao)
2651                 alloc_flags |= GFP_DMA;
2652         rwlock_init(&kvm->arch.sca_lock);
2653         /* start with basic SCA */
2654         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2655         if (!kvm->arch.sca)
2656                 goto out_err;
2657         mutex_lock(&kvm_lock);
2658         sca_offset += 16;
2659         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2660                 sca_offset = 0;
2661         kvm->arch.sca = (struct bsca_block *)
2662                         ((char *) kvm->arch.sca + sca_offset);
2663         mutex_unlock(&kvm_lock);
2664
2665         sprintf(debug_name, "kvm-%u", current->pid);
2666
2667         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2668         if (!kvm->arch.dbf)
2669                 goto out_err;
2670
2671         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2672         kvm->arch.sie_page2 =
2673              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2674         if (!kvm->arch.sie_page2)
2675                 goto out_err;
2676
2677         kvm->arch.sie_page2->kvm = kvm;
2678         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2679
2680         for (i = 0; i < kvm_s390_fac_size(); i++) {
2681                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2682                                               (kvm_s390_fac_base[i] |
2683                                                kvm_s390_fac_ext[i]);
2684                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2685                                               kvm_s390_fac_base[i];
2686         }
2687         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2688
2689         /* we are always in czam mode - even on pre z14 machines */
2690         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2691         set_kvm_facility(kvm->arch.model.fac_list, 138);
2692         /* we emulate STHYI in kvm */
2693         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2694         set_kvm_facility(kvm->arch.model.fac_list, 74);
2695         if (MACHINE_HAS_TLB_GUEST) {
2696                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2697                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2698         }
2699
2700         if (css_general_characteristics.aiv && test_facility(65))
2701                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2702
2703         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2704         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2705
2706         kvm_s390_crypto_init(kvm);
2707
2708         mutex_init(&kvm->arch.float_int.ais_lock);
2709         spin_lock_init(&kvm->arch.float_int.lock);
2710         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2711                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2712         init_waitqueue_head(&kvm->arch.ipte_wq);
2713         mutex_init(&kvm->arch.ipte_mutex);
2714
2715         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2716         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2717
2718         if (type & KVM_VM_S390_UCONTROL) {
2719                 kvm->arch.gmap = NULL;
2720                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2721         } else {
2722                 if (sclp.hamax == U64_MAX)
2723                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2724                 else
2725                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2726                                                     sclp.hamax + 1);
2727                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2728                 if (!kvm->arch.gmap)
2729                         goto out_err;
2730                 kvm->arch.gmap->private = kvm;
2731                 kvm->arch.gmap->pfault_enabled = 0;
2732         }
2733
2734         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2735         kvm->arch.use_skf = sclp.has_skey;
2736         spin_lock_init(&kvm->arch.start_stop_lock);
2737         kvm_s390_vsie_init(kvm);
2738         if (use_gisa)
2739                 kvm_s390_gisa_init(kvm);
2740         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2741
2742         return 0;
2743 out_err:
2744         free_page((unsigned long)kvm->arch.sie_page2);
2745         debug_unregister(kvm->arch.dbf);
2746         sca_dispose(kvm);
2747         KVM_EVENT(3, "creation of vm failed: %d", rc);
2748         return rc;
2749 }
2750
2751 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2752 {
2753         u16 rc, rrc;
2754
2755         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2756         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2757         kvm_s390_clear_local_irqs(vcpu);
2758         kvm_clear_async_pf_completion_queue(vcpu);
2759         if (!kvm_is_ucontrol(vcpu->kvm))
2760                 sca_del_vcpu(vcpu);
2761
2762         if (kvm_is_ucontrol(vcpu->kvm))
2763                 gmap_remove(vcpu->arch.gmap);
2764
2765         if (vcpu->kvm->arch.use_cmma)
2766                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2767         /* We can not hold the vcpu mutex here, we are already dying */
2768         if (kvm_s390_pv_cpu_get_handle(vcpu))
2769                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2770         free_page((unsigned long)(vcpu->arch.sie_block));
2771 }
2772
2773 static void kvm_free_vcpus(struct kvm *kvm)
2774 {
2775         unsigned int i;
2776         struct kvm_vcpu *vcpu;
2777
2778         kvm_for_each_vcpu(i, vcpu, kvm)
2779                 kvm_vcpu_destroy(vcpu);
2780
2781         mutex_lock(&kvm->lock);
2782         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2783                 kvm->vcpus[i] = NULL;
2784
2785         atomic_set(&kvm->online_vcpus, 0);
2786         mutex_unlock(&kvm->lock);
2787 }
2788
2789 void kvm_arch_destroy_vm(struct kvm *kvm)
2790 {
2791         u16 rc, rrc;
2792
2793         kvm_free_vcpus(kvm);
2794         sca_dispose(kvm);
2795         kvm_s390_gisa_destroy(kvm);
2796         /*
2797          * We are already at the end of life and kvm->lock is not taken.
2798          * This is ok as the file descriptor is closed by now and nobody
2799          * can mess with the pv state. To avoid lockdep_assert_held from
2800          * complaining we do not use kvm_s390_pv_is_protected.
2801          */
2802         if (kvm_s390_pv_get_handle(kvm))
2803                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2804         debug_unregister(kvm->arch.dbf);
2805         free_page((unsigned long)kvm->arch.sie_page2);
2806         if (!kvm_is_ucontrol(kvm))
2807                 gmap_remove(kvm->arch.gmap);
2808         kvm_s390_destroy_adapters(kvm);
2809         kvm_s390_clear_float_irqs(kvm);
2810         kvm_s390_vsie_destroy(kvm);
2811         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2812 }
2813
2814 /* Section: vcpu related */
2815 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2816 {
2817         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2818         if (!vcpu->arch.gmap)
2819                 return -ENOMEM;
2820         vcpu->arch.gmap->private = vcpu->kvm;
2821
2822         return 0;
2823 }
2824
2825 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2826 {
2827         if (!kvm_s390_use_sca_entries())
2828                 return;
2829         read_lock(&vcpu->kvm->arch.sca_lock);
2830         if (vcpu->kvm->arch.use_esca) {
2831                 struct esca_block *sca = vcpu->kvm->arch.sca;
2832
2833                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2834                 sca->cpu[vcpu->vcpu_id].sda = 0;
2835         } else {
2836                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2837
2838                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2839                 sca->cpu[vcpu->vcpu_id].sda = 0;
2840         }
2841         read_unlock(&vcpu->kvm->arch.sca_lock);
2842 }
2843
2844 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846         if (!kvm_s390_use_sca_entries()) {
2847                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2848
2849                 /* we still need the basic sca for the ipte control */
2850                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2851                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2852                 return;
2853         }
2854         read_lock(&vcpu->kvm->arch.sca_lock);
2855         if (vcpu->kvm->arch.use_esca) {
2856                 struct esca_block *sca = vcpu->kvm->arch.sca;
2857
2858                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2859                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2860                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2861                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2862                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863         } else {
2864                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2865
2866                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2867                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2868                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2869                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2870         }
2871         read_unlock(&vcpu->kvm->arch.sca_lock);
2872 }
2873
2874 /* Basic SCA to Extended SCA data copy routines */
2875 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2876 {
2877         d->sda = s->sda;
2878         d->sigp_ctrl.c = s->sigp_ctrl.c;
2879         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2880 }
2881
2882 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2883 {
2884         int i;
2885
2886         d->ipte_control = s->ipte_control;
2887         d->mcn[0] = s->mcn;
2888         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2889                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2890 }
2891
2892 static int sca_switch_to_extended(struct kvm *kvm)
2893 {
2894         struct bsca_block *old_sca = kvm->arch.sca;
2895         struct esca_block *new_sca;
2896         struct kvm_vcpu *vcpu;
2897         unsigned int vcpu_idx;
2898         u32 scaol, scaoh;
2899
2900         if (kvm->arch.use_esca)
2901                 return 0;
2902
2903         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2904         if (!new_sca)
2905                 return -ENOMEM;
2906
2907         scaoh = (u32)((u64)(new_sca) >> 32);
2908         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2909
2910         kvm_s390_vcpu_block_all(kvm);
2911         write_lock(&kvm->arch.sca_lock);
2912
2913         sca_copy_b_to_e(new_sca, old_sca);
2914
2915         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2916                 vcpu->arch.sie_block->scaoh = scaoh;
2917                 vcpu->arch.sie_block->scaol = scaol;
2918                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2919         }
2920         kvm->arch.sca = new_sca;
2921         kvm->arch.use_esca = 1;
2922
2923         write_unlock(&kvm->arch.sca_lock);
2924         kvm_s390_vcpu_unblock_all(kvm);
2925
2926         free_page((unsigned long)old_sca);
2927
2928         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2929                  old_sca, kvm->arch.sca);
2930         return 0;
2931 }
2932
2933 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2934 {
2935         int rc;
2936
2937         if (!kvm_s390_use_sca_entries()) {
2938                 if (id < KVM_MAX_VCPUS)
2939                         return true;
2940                 return false;
2941         }
2942         if (id < KVM_S390_BSCA_CPU_SLOTS)
2943                 return true;
2944         if (!sclp.has_esca || !sclp.has_64bscao)
2945                 return false;
2946
2947         mutex_lock(&kvm->lock);
2948         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2949         mutex_unlock(&kvm->lock);
2950
2951         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2952 }
2953
2954 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2955 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2956 {
2957         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2958         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2959         vcpu->arch.cputm_start = get_tod_clock_fast();
2960         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2961 }
2962
2963 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2964 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2965 {
2966         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2967         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2968         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2969         vcpu->arch.cputm_start = 0;
2970         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2971 }
2972
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2974 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2977         vcpu->arch.cputm_enabled = true;
2978         __start_cpu_timer_accounting(vcpu);
2979 }
2980
2981 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2982 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2983 {
2984         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2985         __stop_cpu_timer_accounting(vcpu);
2986         vcpu->arch.cputm_enabled = false;
2987 }
2988
2989 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990 {
2991         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2992         __enable_cpu_timer_accounting(vcpu);
2993         preempt_enable();
2994 }
2995
2996 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2997 {
2998         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2999         __disable_cpu_timer_accounting(vcpu);
3000         preempt_enable();
3001 }
3002
3003 /* set the cpu timer - may only be called from the VCPU thread itself */
3004 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3005 {
3006         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3007         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008         if (vcpu->arch.cputm_enabled)
3009                 vcpu->arch.cputm_start = get_tod_clock_fast();
3010         vcpu->arch.sie_block->cputm = cputm;
3011         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012         preempt_enable();
3013 }
3014
3015 /* update and get the cpu timer - can also be called from other VCPU threads */
3016 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3017 {
3018         unsigned int seq;
3019         __u64 value;
3020
3021         if (unlikely(!vcpu->arch.cputm_enabled))
3022                 return vcpu->arch.sie_block->cputm;
3023
3024         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3025         do {
3026                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3027                 /*
3028                  * If the writer would ever execute a read in the critical
3029                  * section, e.g. in irq context, we have a deadlock.
3030                  */
3031                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3032                 value = vcpu->arch.sie_block->cputm;
3033                 /* if cputm_start is 0, accounting is being started/stopped */
3034                 if (likely(vcpu->arch.cputm_start))
3035                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3036         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3037         preempt_enable();
3038         return value;
3039 }
3040
3041 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3042 {
3043
3044         gmap_enable(vcpu->arch.enabled_gmap);
3045         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3046         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3047                 __start_cpu_timer_accounting(vcpu);
3048         vcpu->cpu = cpu;
3049 }
3050
3051 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3052 {
3053         vcpu->cpu = -1;
3054         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3055                 __stop_cpu_timer_accounting(vcpu);
3056         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3057         vcpu->arch.enabled_gmap = gmap_get_enabled();
3058         gmap_disable(vcpu->arch.enabled_gmap);
3059
3060 }
3061
3062 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3063 {
3064         mutex_lock(&vcpu->kvm->lock);
3065         preempt_disable();
3066         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3067         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3068         preempt_enable();
3069         mutex_unlock(&vcpu->kvm->lock);
3070         if (!kvm_is_ucontrol(vcpu->kvm)) {
3071                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3072                 sca_add_vcpu(vcpu);
3073         }
3074         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3075                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3076         /* make vcpu_load load the right gmap on the first trigger */
3077         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3078 }
3079
3080 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3081 {
3082         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3083             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3084                 return true;
3085         return false;
3086 }
3087
3088 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3089 {
3090         /* At least one ECC subfunction must be present */
3091         return kvm_has_pckmo_subfunc(kvm, 32) ||
3092                kvm_has_pckmo_subfunc(kvm, 33) ||
3093                kvm_has_pckmo_subfunc(kvm, 34) ||
3094                kvm_has_pckmo_subfunc(kvm, 40) ||
3095                kvm_has_pckmo_subfunc(kvm, 41);
3096
3097 }
3098
3099 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3100 {
3101         /*
3102          * If the AP instructions are not being interpreted and the MSAX3
3103          * facility is not configured for the guest, there is nothing to set up.
3104          */
3105         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3106                 return;
3107
3108         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3109         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3110         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3111         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3112
3113         if (vcpu->kvm->arch.crypto.apie)
3114                 vcpu->arch.sie_block->eca |= ECA_APIE;
3115
3116         /* Set up protected key support */
3117         if (vcpu->kvm->arch.crypto.aes_kw) {
3118                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3119                 /* ecc is also wrapped with AES key */
3120                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3121                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3122         }
3123
3124         if (vcpu->kvm->arch.crypto.dea_kw)
3125                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3126 }
3127
3128 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3129 {
3130         free_page(vcpu->arch.sie_block->cbrlo);
3131         vcpu->arch.sie_block->cbrlo = 0;
3132 }
3133
3134 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3135 {
3136         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3137         if (!vcpu->arch.sie_block->cbrlo)
3138                 return -ENOMEM;
3139         return 0;
3140 }
3141
3142 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3143 {
3144         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3145
3146         vcpu->arch.sie_block->ibc = model->ibc;
3147         if (test_kvm_facility(vcpu->kvm, 7))
3148                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3149 }
3150
3151 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3152 {
3153         int rc = 0;
3154         u16 uvrc, uvrrc;
3155
3156         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3157                                                     CPUSTAT_SM |
3158                                                     CPUSTAT_STOPPED);
3159
3160         if (test_kvm_facility(vcpu->kvm, 78))
3161                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3162         else if (test_kvm_facility(vcpu->kvm, 8))
3163                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3164
3165         kvm_s390_vcpu_setup_model(vcpu);
3166
3167         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3168         if (MACHINE_HAS_ESOP)
3169                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3170         if (test_kvm_facility(vcpu->kvm, 9))
3171                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3172         if (test_kvm_facility(vcpu->kvm, 73))
3173                 vcpu->arch.sie_block->ecb |= ECB_TE;
3174
3175         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3176                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3177         if (test_kvm_facility(vcpu->kvm, 130))
3178                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3179         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3180         if (sclp.has_cei)
3181                 vcpu->arch.sie_block->eca |= ECA_CEI;
3182         if (sclp.has_ib)
3183                 vcpu->arch.sie_block->eca |= ECA_IB;
3184         if (sclp.has_siif)
3185                 vcpu->arch.sie_block->eca |= ECA_SII;
3186         if (sclp.has_sigpif)
3187                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3188         if (test_kvm_facility(vcpu->kvm, 129)) {
3189                 vcpu->arch.sie_block->eca |= ECA_VX;
3190                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3191         }
3192         if (test_kvm_facility(vcpu->kvm, 139))
3193                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3194         if (test_kvm_facility(vcpu->kvm, 156))
3195                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3196         if (vcpu->arch.sie_block->gd) {
3197                 vcpu->arch.sie_block->eca |= ECA_AIV;
3198                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3199                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3200         }
3201         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3202                                         | SDNXC;
3203         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3204
3205         if (sclp.has_kss)
3206                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3207         else
3208                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3209
3210         if (vcpu->kvm->arch.use_cmma) {
3211                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3212                 if (rc)
3213                         return rc;
3214         }
3215         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3216         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3217
3218         vcpu->arch.sie_block->hpid = HPID_KVM;
3219
3220         kvm_s390_vcpu_crypto_setup(vcpu);
3221
3222         mutex_lock(&vcpu->kvm->lock);
3223         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3224                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3225                 if (rc)
3226                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3227         }
3228         mutex_unlock(&vcpu->kvm->lock);
3229
3230         return rc;
3231 }
3232
3233 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3234 {
3235         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3236                 return -EINVAL;
3237         return 0;
3238 }
3239
3240 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3241 {
3242         struct sie_page *sie_page;
3243         int rc;
3244
3245         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3246         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3247         if (!sie_page)
3248                 return -ENOMEM;
3249
3250         vcpu->arch.sie_block = &sie_page->sie_block;
3251         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3252
3253         /* the real guest size will always be smaller than msl */
3254         vcpu->arch.sie_block->mso = 0;
3255         vcpu->arch.sie_block->msl = sclp.hamax;
3256
3257         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3258         spin_lock_init(&vcpu->arch.local_int.lock);
3259         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3260         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3261                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3262         seqcount_init(&vcpu->arch.cputm_seqcount);
3263
3264         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3265         kvm_clear_async_pf_completion_queue(vcpu);
3266         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3267                                     KVM_SYNC_GPRS |
3268                                     KVM_SYNC_ACRS |
3269                                     KVM_SYNC_CRS |
3270                                     KVM_SYNC_ARCH0 |
3271                                     KVM_SYNC_PFAULT |
3272                                     KVM_SYNC_DIAG318;
3273         kvm_s390_set_prefix(vcpu, 0);
3274         if (test_kvm_facility(vcpu->kvm, 64))
3275                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3276         if (test_kvm_facility(vcpu->kvm, 82))
3277                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3278         if (test_kvm_facility(vcpu->kvm, 133))
3279                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3280         if (test_kvm_facility(vcpu->kvm, 156))
3281                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3282         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3283          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3284          */
3285         if (MACHINE_HAS_VX)
3286                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3287         else
3288                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3289
3290         if (kvm_is_ucontrol(vcpu->kvm)) {
3291                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3292                 if (rc)
3293                         goto out_free_sie_block;
3294         }
3295
3296         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3297                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3298         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3299
3300         rc = kvm_s390_vcpu_setup(vcpu);
3301         if (rc)
3302                 goto out_ucontrol_uninit;
3303         return 0;
3304
3305 out_ucontrol_uninit:
3306         if (kvm_is_ucontrol(vcpu->kvm))
3307                 gmap_remove(vcpu->arch.gmap);
3308 out_free_sie_block:
3309         free_page((unsigned long)(vcpu->arch.sie_block));
3310         return rc;
3311 }
3312
3313 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3314 {
3315         return kvm_s390_vcpu_has_irq(vcpu, 0);
3316 }
3317
3318 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3319 {
3320         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3321 }
3322
3323 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3324 {
3325         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3326         exit_sie(vcpu);
3327 }
3328
3329 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3330 {
3331         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3332 }
3333
3334 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3335 {
3336         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3337         exit_sie(vcpu);
3338 }
3339
3340 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3341 {
3342         return atomic_read(&vcpu->arch.sie_block->prog20) &
3343                (PROG_BLOCK_SIE | PROG_REQUEST);
3344 }
3345
3346 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3347 {
3348         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3349 }
3350
3351 /*
3352  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3353  * If the CPU is not running (e.g. waiting as idle) the function will
3354  * return immediately. */
3355 void exit_sie(struct kvm_vcpu *vcpu)
3356 {
3357         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3358         kvm_s390_vsie_kick(vcpu);
3359         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3360                 cpu_relax();
3361 }
3362
3363 /* Kick a guest cpu out of SIE to process a request synchronously */
3364 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3365 {
3366         kvm_make_request(req, vcpu);
3367         kvm_s390_vcpu_request(vcpu);
3368 }
3369
3370 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3371                               unsigned long end)
3372 {
3373         struct kvm *kvm = gmap->private;
3374         struct kvm_vcpu *vcpu;
3375         unsigned long prefix;
3376         int i;
3377
3378         if (gmap_is_shadow(gmap))
3379                 return;
3380         if (start >= 1UL << 31)
3381                 /* We are only interested in prefix pages */
3382                 return;
3383         kvm_for_each_vcpu(i, vcpu, kvm) {
3384                 /* match against both prefix pages */
3385                 prefix = kvm_s390_get_prefix(vcpu);
3386                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3387                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3388                                    start, end);
3389                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3390                 }
3391         }
3392 }
3393
3394 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3395 {
3396         /* do not poll with more than halt_poll_max_steal percent of steal time */
3397         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3398             halt_poll_max_steal) {
3399                 vcpu->stat.halt_no_poll_steal++;
3400                 return true;
3401         }
3402         return false;
3403 }
3404
3405 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3406 {
3407         /* kvm common code refers to this, but never calls it */
3408         BUG();
3409         return 0;
3410 }
3411
3412 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3413                                            struct kvm_one_reg *reg)
3414 {
3415         int r = -EINVAL;
3416
3417         switch (reg->id) {
3418         case KVM_REG_S390_TODPR:
3419                 r = put_user(vcpu->arch.sie_block->todpr,
3420                              (u32 __user *)reg->addr);
3421                 break;
3422         case KVM_REG_S390_EPOCHDIFF:
3423                 r = put_user(vcpu->arch.sie_block->epoch,
3424                              (u64 __user *)reg->addr);
3425                 break;
3426         case KVM_REG_S390_CPU_TIMER:
3427                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3428                              (u64 __user *)reg->addr);
3429                 break;
3430         case KVM_REG_S390_CLOCK_COMP:
3431                 r = put_user(vcpu->arch.sie_block->ckc,
3432                              (u64 __user *)reg->addr);
3433                 break;
3434         case KVM_REG_S390_PFTOKEN:
3435                 r = put_user(vcpu->arch.pfault_token,
3436                              (u64 __user *)reg->addr);
3437                 break;
3438         case KVM_REG_S390_PFCOMPARE:
3439                 r = put_user(vcpu->arch.pfault_compare,
3440                              (u64 __user *)reg->addr);
3441                 break;
3442         case KVM_REG_S390_PFSELECT:
3443                 r = put_user(vcpu->arch.pfault_select,
3444                              (u64 __user *)reg->addr);
3445                 break;
3446         case KVM_REG_S390_PP:
3447                 r = put_user(vcpu->arch.sie_block->pp,
3448                              (u64 __user *)reg->addr);
3449                 break;
3450         case KVM_REG_S390_GBEA:
3451                 r = put_user(vcpu->arch.sie_block->gbea,
3452                              (u64 __user *)reg->addr);
3453                 break;
3454         default:
3455                 break;
3456         }
3457
3458         return r;
3459 }
3460
3461 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3462                                            struct kvm_one_reg *reg)
3463 {
3464         int r = -EINVAL;
3465         __u64 val;
3466
3467         switch (reg->id) {
3468         case KVM_REG_S390_TODPR:
3469                 r = get_user(vcpu->arch.sie_block->todpr,
3470                              (u32 __user *)reg->addr);
3471                 break;
3472         case KVM_REG_S390_EPOCHDIFF:
3473                 r = get_user(vcpu->arch.sie_block->epoch,
3474                              (u64 __user *)reg->addr);
3475                 break;
3476         case KVM_REG_S390_CPU_TIMER:
3477                 r = get_user(val, (u64 __user *)reg->addr);
3478                 if (!r)
3479                         kvm_s390_set_cpu_timer(vcpu, val);
3480                 break;
3481         case KVM_REG_S390_CLOCK_COMP:
3482                 r = get_user(vcpu->arch.sie_block->ckc,
3483                              (u64 __user *)reg->addr);
3484                 break;
3485         case KVM_REG_S390_PFTOKEN:
3486                 r = get_user(vcpu->arch.pfault_token,
3487                              (u64 __user *)reg->addr);
3488                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3489                         kvm_clear_async_pf_completion_queue(vcpu);
3490                 break;
3491         case KVM_REG_S390_PFCOMPARE:
3492                 r = get_user(vcpu->arch.pfault_compare,
3493                              (u64 __user *)reg->addr);
3494                 break;
3495         case KVM_REG_S390_PFSELECT:
3496                 r = get_user(vcpu->arch.pfault_select,
3497                              (u64 __user *)reg->addr);
3498                 break;
3499         case KVM_REG_S390_PP:
3500                 r = get_user(vcpu->arch.sie_block->pp,
3501                              (u64 __user *)reg->addr);
3502                 break;
3503         case KVM_REG_S390_GBEA:
3504                 r = get_user(vcpu->arch.sie_block->gbea,
3505                              (u64 __user *)reg->addr);
3506                 break;
3507         default:
3508                 break;
3509         }
3510
3511         return r;
3512 }
3513
3514 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3515 {
3516         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3517         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3518         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3519
3520         kvm_clear_async_pf_completion_queue(vcpu);
3521         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3522                 kvm_s390_vcpu_stop(vcpu);
3523         kvm_s390_clear_local_irqs(vcpu);
3524 }
3525
3526 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3527 {
3528         /* Initial reset is a superset of the normal reset */
3529         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3530
3531         /*
3532          * This equals initial cpu reset in pop, but we don't switch to ESA.
3533          * We do not only reset the internal data, but also ...
3534          */
3535         vcpu->arch.sie_block->gpsw.mask = 0;
3536         vcpu->arch.sie_block->gpsw.addr = 0;
3537         kvm_s390_set_prefix(vcpu, 0);
3538         kvm_s390_set_cpu_timer(vcpu, 0);
3539         vcpu->arch.sie_block->ckc = 0;
3540         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3541         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3542         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3543
3544         /* ... the data in sync regs */
3545         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3546         vcpu->run->s.regs.ckc = 0;
3547         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3548         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3549         vcpu->run->psw_addr = 0;
3550         vcpu->run->psw_mask = 0;
3551         vcpu->run->s.regs.todpr = 0;
3552         vcpu->run->s.regs.cputm = 0;
3553         vcpu->run->s.regs.ckc = 0;
3554         vcpu->run->s.regs.pp = 0;
3555         vcpu->run->s.regs.gbea = 1;
3556         vcpu->run->s.regs.fpc = 0;
3557         /*
3558          * Do not reset these registers in the protected case, as some of
3559          * them are overlayed and they are not accessible in this case
3560          * anyway.
3561          */
3562         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3563                 vcpu->arch.sie_block->gbea = 1;
3564                 vcpu->arch.sie_block->pp = 0;
3565                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3566                 vcpu->arch.sie_block->todpr = 0;
3567                 vcpu->arch.sie_block->cpnc = 0;
3568         }
3569 }
3570
3571 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3572 {
3573         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3574
3575         /* Clear reset is a superset of the initial reset */
3576         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3577
3578         memset(&regs->gprs, 0, sizeof(regs->gprs));
3579         memset(&regs->vrs, 0, sizeof(regs->vrs));
3580         memset(&regs->acrs, 0, sizeof(regs->acrs));
3581         memset(&regs->gscb, 0, sizeof(regs->gscb));
3582
3583         regs->etoken = 0;
3584         regs->etoken_extension = 0;
3585         regs->diag318 = 0;
3586 }
3587
3588 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3589 {
3590         vcpu_load(vcpu);
3591         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3592         vcpu_put(vcpu);
3593         return 0;
3594 }
3595
3596 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3597 {
3598         vcpu_load(vcpu);
3599         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3600         vcpu_put(vcpu);
3601         return 0;
3602 }
3603
3604 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3605                                   struct kvm_sregs *sregs)
3606 {
3607         vcpu_load(vcpu);
3608
3609         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3610         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3611
3612         vcpu_put(vcpu);
3613         return 0;
3614 }
3615
3616 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3617                                   struct kvm_sregs *sregs)
3618 {
3619         vcpu_load(vcpu);
3620
3621         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3622         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3623
3624         vcpu_put(vcpu);
3625         return 0;
3626 }
3627
3628 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3629 {
3630         int ret = 0;
3631
3632         vcpu_load(vcpu);
3633
3634         if (test_fp_ctl(fpu->fpc)) {
3635                 ret = -EINVAL;
3636                 goto out;
3637         }
3638         vcpu->run->s.regs.fpc = fpu->fpc;
3639         if (MACHINE_HAS_VX)
3640                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3641                                  (freg_t *) fpu->fprs);
3642         else
3643                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3644
3645 out:
3646         vcpu_put(vcpu);
3647         return ret;
3648 }
3649
3650 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3651 {
3652         vcpu_load(vcpu);
3653
3654         /* make sure we have the latest values */
3655         save_fpu_regs();
3656         if (MACHINE_HAS_VX)
3657                 convert_vx_to_fp((freg_t *) fpu->fprs,
3658                                  (__vector128 *) vcpu->run->s.regs.vrs);
3659         else
3660                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3661         fpu->fpc = vcpu->run->s.regs.fpc;
3662
3663         vcpu_put(vcpu);
3664         return 0;
3665 }
3666
3667 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3668 {
3669         int rc = 0;
3670
3671         if (!is_vcpu_stopped(vcpu))
3672                 rc = -EBUSY;
3673         else {
3674                 vcpu->run->psw_mask = psw.mask;
3675                 vcpu->run->psw_addr = psw.addr;
3676         }
3677         return rc;
3678 }
3679
3680 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3681                                   struct kvm_translation *tr)
3682 {
3683         return -EINVAL; /* not implemented yet */
3684 }
3685
3686 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3687                               KVM_GUESTDBG_USE_HW_BP | \
3688                               KVM_GUESTDBG_ENABLE)
3689
3690 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3691                                         struct kvm_guest_debug *dbg)
3692 {
3693         int rc = 0;
3694
3695         vcpu_load(vcpu);
3696
3697         vcpu->guest_debug = 0;
3698         kvm_s390_clear_bp_data(vcpu);
3699
3700         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3701                 rc = -EINVAL;
3702                 goto out;
3703         }
3704         if (!sclp.has_gpere) {
3705                 rc = -EINVAL;
3706                 goto out;
3707         }
3708
3709         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3710                 vcpu->guest_debug = dbg->control;
3711                 /* enforce guest PER */
3712                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3713
3714                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3715                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3716         } else {
3717                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3718                 vcpu->arch.guestdbg.last_bp = 0;
3719         }
3720
3721         if (rc) {
3722                 vcpu->guest_debug = 0;
3723                 kvm_s390_clear_bp_data(vcpu);
3724                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3725         }
3726
3727 out:
3728         vcpu_put(vcpu);
3729         return rc;
3730 }
3731
3732 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3733                                     struct kvm_mp_state *mp_state)
3734 {
3735         int ret;
3736
3737         vcpu_load(vcpu);
3738
3739         /* CHECK_STOP and LOAD are not supported yet */
3740         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3741                                       KVM_MP_STATE_OPERATING;
3742
3743         vcpu_put(vcpu);
3744         return ret;
3745 }
3746
3747 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3748                                     struct kvm_mp_state *mp_state)
3749 {
3750         int rc = 0;
3751
3752         vcpu_load(vcpu);
3753
3754         /* user space knows about this interface - let it control the state */
3755         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3756
3757         switch (mp_state->mp_state) {
3758         case KVM_MP_STATE_STOPPED:
3759                 rc = kvm_s390_vcpu_stop(vcpu);
3760                 break;
3761         case KVM_MP_STATE_OPERATING:
3762                 rc = kvm_s390_vcpu_start(vcpu);
3763                 break;
3764         case KVM_MP_STATE_LOAD:
3765                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3766                         rc = -ENXIO;
3767                         break;
3768                 }
3769                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3770                 break;
3771         case KVM_MP_STATE_CHECK_STOP:
3772                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3773         default:
3774                 rc = -ENXIO;
3775         }
3776
3777         vcpu_put(vcpu);
3778         return rc;
3779 }
3780
3781 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3782 {
3783         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3784 }
3785
3786 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3787 {
3788 retry:
3789         kvm_s390_vcpu_request_handled(vcpu);
3790         if (!kvm_request_pending(vcpu))
3791                 return 0;
3792         /*
3793          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3794          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3795          * This ensures that the ipte instruction for this request has
3796          * already finished. We might race against a second unmapper that
3797          * wants to set the blocking bit. Lets just retry the request loop.
3798          */
3799         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3800                 int rc;
3801                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3802                                           kvm_s390_get_prefix(vcpu),
3803                                           PAGE_SIZE * 2, PROT_WRITE);
3804                 if (rc) {
3805                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3806                         return rc;
3807                 }
3808                 goto retry;
3809         }
3810
3811         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3812                 vcpu->arch.sie_block->ihcpu = 0xffff;
3813                 goto retry;
3814         }
3815
3816         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3817                 if (!ibs_enabled(vcpu)) {
3818                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3819                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3820                 }
3821                 goto retry;
3822         }
3823
3824         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3825                 if (ibs_enabled(vcpu)) {
3826                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3827                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3828                 }
3829                 goto retry;
3830         }
3831
3832         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3833                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3834                 goto retry;
3835         }
3836
3837         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3838                 /*
3839                  * Disable CMM virtualization; we will emulate the ESSA
3840                  * instruction manually, in order to provide additional
3841                  * functionalities needed for live migration.
3842                  */
3843                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3844                 goto retry;
3845         }
3846
3847         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3848                 /*
3849                  * Re-enable CMM virtualization if CMMA is available and
3850                  * CMM has been used.
3851                  */
3852                 if ((vcpu->kvm->arch.use_cmma) &&
3853                     (vcpu->kvm->mm->context.uses_cmm))
3854                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3855                 goto retry;
3856         }
3857
3858         /* nothing to do, just clear the request */
3859         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3860         /* we left the vsie handler, nothing to do, just clear the request */
3861         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3862
3863         return 0;
3864 }
3865
3866 void kvm_s390_set_tod_clock(struct kvm *kvm,
3867                             const struct kvm_s390_vm_tod_clock *gtod)
3868 {
3869         struct kvm_vcpu *vcpu;
3870         struct kvm_s390_tod_clock_ext htod;
3871         int i;
3872
3873         mutex_lock(&kvm->lock);
3874         preempt_disable();
3875
3876         get_tod_clock_ext((char *)&htod);
3877
3878         kvm->arch.epoch = gtod->tod - htod.tod;
3879         kvm->arch.epdx = 0;
3880         if (test_kvm_facility(kvm, 139)) {
3881                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3882                 if (kvm->arch.epoch > gtod->tod)
3883                         kvm->arch.epdx -= 1;
3884         }
3885
3886         kvm_s390_vcpu_block_all(kvm);
3887         kvm_for_each_vcpu(i, vcpu, kvm) {
3888                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3889                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3890         }
3891
3892         kvm_s390_vcpu_unblock_all(kvm);
3893         preempt_enable();
3894         mutex_unlock(&kvm->lock);
3895 }
3896
3897 /**
3898  * kvm_arch_fault_in_page - fault-in guest page if necessary
3899  * @vcpu: The corresponding virtual cpu
3900  * @gpa: Guest physical address
3901  * @writable: Whether the page should be writable or not
3902  *
3903  * Make sure that a guest page has been faulted-in on the host.
3904  *
3905  * Return: Zero on success, negative error code otherwise.
3906  */
3907 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3908 {
3909         return gmap_fault(vcpu->arch.gmap, gpa,
3910                           writable ? FAULT_FLAG_WRITE : 0);
3911 }
3912
3913 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3914                                       unsigned long token)
3915 {
3916         struct kvm_s390_interrupt inti;
3917         struct kvm_s390_irq irq;
3918
3919         if (start_token) {
3920                 irq.u.ext.ext_params2 = token;
3921                 irq.type = KVM_S390_INT_PFAULT_INIT;
3922                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3923         } else {
3924                 inti.type = KVM_S390_INT_PFAULT_DONE;
3925                 inti.parm64 = token;
3926                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3927         }
3928 }
3929
3930 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3931                                      struct kvm_async_pf *work)
3932 {
3933         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3934         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3935
3936         return true;
3937 }
3938
3939 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3940                                  struct kvm_async_pf *work)
3941 {
3942         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3943         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3944 }
3945
3946 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3947                                struct kvm_async_pf *work)
3948 {
3949         /* s390 will always inject the page directly */
3950 }
3951
3952 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3953 {
3954         /*
3955          * s390 will always inject the page directly,
3956          * but we still want check_async_completion to cleanup
3957          */
3958         return true;
3959 }
3960
3961 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3962 {
3963         hva_t hva;
3964         struct kvm_arch_async_pf arch;
3965
3966         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3967                 return false;
3968         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3969             vcpu->arch.pfault_compare)
3970                 return false;
3971         if (psw_extint_disabled(vcpu))
3972                 return false;
3973         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3974                 return false;
3975         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3976                 return false;
3977         if (!vcpu->arch.gmap->pfault_enabled)
3978                 return false;
3979
3980         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3981         hva += current->thread.gmap_addr & ~PAGE_MASK;
3982         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3983                 return false;
3984
3985         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3986 }
3987
3988 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3989 {
3990         int rc, cpuflags;
3991
3992         /*
3993          * On s390 notifications for arriving pages will be delivered directly
3994          * to the guest but the house keeping for completed pfaults is
3995          * handled outside the worker.
3996          */
3997         kvm_check_async_pf_completion(vcpu);
3998
3999         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4000         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4001
4002         if (need_resched())
4003                 schedule();
4004
4005         if (!kvm_is_ucontrol(vcpu->kvm)) {
4006                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4007                 if (rc)
4008                         return rc;
4009         }
4010
4011         rc = kvm_s390_handle_requests(vcpu);
4012         if (rc)
4013                 return rc;
4014
4015         if (guestdbg_enabled(vcpu)) {
4016                 kvm_s390_backup_guest_per_regs(vcpu);
4017                 kvm_s390_patch_guest_per_regs(vcpu);
4018         }
4019
4020         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4021
4022         vcpu->arch.sie_block->icptcode = 0;
4023         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4024         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4025         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4026
4027         return 0;
4028 }
4029
4030 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4031 {
4032         struct kvm_s390_pgm_info pgm_info = {
4033                 .code = PGM_ADDRESSING,
4034         };
4035         u8 opcode, ilen;
4036         int rc;
4037
4038         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4039         trace_kvm_s390_sie_fault(vcpu);
4040
4041         /*
4042          * We want to inject an addressing exception, which is defined as a
4043          * suppressing or terminating exception. However, since we came here
4044          * by a DAT access exception, the PSW still points to the faulting
4045          * instruction since DAT exceptions are nullifying. So we've got
4046          * to look up the current opcode to get the length of the instruction
4047          * to be able to forward the PSW.
4048          */
4049         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4050         ilen = insn_length(opcode);
4051         if (rc < 0) {
4052                 return rc;
4053         } else if (rc) {
4054                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4055                  * Forward by arbitrary ilc, injection will take care of
4056                  * nullification if necessary.
4057                  */
4058                 pgm_info = vcpu->arch.pgm;
4059                 ilen = 4;
4060         }
4061         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4062         kvm_s390_forward_psw(vcpu, ilen);
4063         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4064 }
4065
4066 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4067 {
4068         struct mcck_volatile_info *mcck_info;
4069         struct sie_page *sie_page;
4070
4071         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4072                    vcpu->arch.sie_block->icptcode);
4073         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4074
4075         if (guestdbg_enabled(vcpu))
4076                 kvm_s390_restore_guest_per_regs(vcpu);
4077
4078         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4079         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4080
4081         if (exit_reason == -EINTR) {
4082                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4083                 sie_page = container_of(vcpu->arch.sie_block,
4084                                         struct sie_page, sie_block);
4085                 mcck_info = &sie_page->mcck_info;
4086                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4087                 return 0;
4088         }
4089
4090         if (vcpu->arch.sie_block->icptcode > 0) {
4091                 int rc = kvm_handle_sie_intercept(vcpu);
4092
4093                 if (rc != -EOPNOTSUPP)
4094                         return rc;
4095                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4096                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4097                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4098                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4099                 return -EREMOTE;
4100         } else if (exit_reason != -EFAULT) {
4101                 vcpu->stat.exit_null++;
4102                 return 0;
4103         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4104                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4105                 vcpu->run->s390_ucontrol.trans_exc_code =
4106                                                 current->thread.gmap_addr;
4107                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4108                 return -EREMOTE;
4109         } else if (current->thread.gmap_pfault) {
4110                 trace_kvm_s390_major_guest_pfault(vcpu);
4111                 current->thread.gmap_pfault = 0;
4112                 if (kvm_arch_setup_async_pf(vcpu))
4113                         return 0;
4114                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4115         }
4116         return vcpu_post_run_fault_in_sie(vcpu);
4117 }
4118
4119 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4120 static int __vcpu_run(struct kvm_vcpu *vcpu)
4121 {
4122         int rc, exit_reason;
4123         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4124
4125         /*
4126          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4127          * ning the guest), so that memslots (and other stuff) are protected
4128          */
4129         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4130
4131         do {
4132                 rc = vcpu_pre_run(vcpu);
4133                 if (rc)
4134                         break;
4135
4136                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4137                 /*
4138                  * As PF_VCPU will be used in fault handler, between
4139                  * guest_enter and guest_exit should be no uaccess.
4140                  */
4141                 local_irq_disable();
4142                 guest_enter_irqoff();
4143                 __disable_cpu_timer_accounting(vcpu);
4144                 local_irq_enable();
4145                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4146                         memcpy(sie_page->pv_grregs,
4147                                vcpu->run->s.regs.gprs,
4148                                sizeof(sie_page->pv_grregs));
4149                 }
4150                 exit_reason = sie64a(vcpu->arch.sie_block,
4151                                      vcpu->run->s.regs.gprs);
4152                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153                         memcpy(vcpu->run->s.regs.gprs,
4154                                sie_page->pv_grregs,
4155                                sizeof(sie_page->pv_grregs));
4156                         /*
4157                          * We're not allowed to inject interrupts on intercepts
4158                          * that leave the guest state in an "in-between" state
4159                          * where the next SIE entry will do a continuation.
4160                          * Fence interrupts in our "internal" PSW.
4161                          */
4162                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4163                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4164                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4165                         }
4166                 }
4167                 local_irq_disable();
4168                 __enable_cpu_timer_accounting(vcpu);
4169                 guest_exit_irqoff();
4170                 local_irq_enable();
4171                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4172
4173                 rc = vcpu_post_run(vcpu, exit_reason);
4174         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4175
4176         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4177         return rc;
4178 }
4179
4180 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4181 {
4182         struct kvm_run *kvm_run = vcpu->run;
4183         struct runtime_instr_cb *riccb;
4184         struct gs_cb *gscb;
4185
4186         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4187         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4188         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4189         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4190         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4191                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4192                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4193                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4194         }
4195         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4196                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4197                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4198                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4199                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4200                         kvm_clear_async_pf_completion_queue(vcpu);
4201         }
4202         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4203                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4204                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4205         }
4206         /*
4207          * If userspace sets the riccb (e.g. after migration) to a valid state,
4208          * we should enable RI here instead of doing the lazy enablement.
4209          */
4210         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4211             test_kvm_facility(vcpu->kvm, 64) &&
4212             riccb->v &&
4213             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4214                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4215                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4216         }
4217         /*
4218          * If userspace sets the gscb (e.g. after migration) to non-zero,
4219          * we should enable GS here instead of doing the lazy enablement.
4220          */
4221         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4222             test_kvm_facility(vcpu->kvm, 133) &&
4223             gscb->gssm &&
4224             !vcpu->arch.gs_enabled) {
4225                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4226                 vcpu->arch.sie_block->ecb |= ECB_GS;
4227                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4228                 vcpu->arch.gs_enabled = 1;
4229         }
4230         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4231             test_kvm_facility(vcpu->kvm, 82)) {
4232                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4233                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4234         }
4235         if (MACHINE_HAS_GS) {
4236                 preempt_disable();
4237                 __ctl_set_bit(2, 4);
4238                 if (current->thread.gs_cb) {
4239                         vcpu->arch.host_gscb = current->thread.gs_cb;
4240                         save_gs_cb(vcpu->arch.host_gscb);
4241                 }
4242                 if (vcpu->arch.gs_enabled) {
4243                         current->thread.gs_cb = (struct gs_cb *)
4244                                                 &vcpu->run->s.regs.gscb;
4245                         restore_gs_cb(current->thread.gs_cb);
4246                 }
4247                 preempt_enable();
4248         }
4249         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4250 }
4251
4252 static void sync_regs(struct kvm_vcpu *vcpu)
4253 {
4254         struct kvm_run *kvm_run = vcpu->run;
4255
4256         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4257                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4258         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4259                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4260                 /* some control register changes require a tlb flush */
4261                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4262         }
4263         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4264                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4265                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4266         }
4267         save_access_regs(vcpu->arch.host_acrs);
4268         restore_access_regs(vcpu->run->s.regs.acrs);
4269         /* save host (userspace) fprs/vrs */
4270         save_fpu_regs();
4271         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4272         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4273         if (MACHINE_HAS_VX)
4274                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4275         else
4276                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4277         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4278         if (test_fp_ctl(current->thread.fpu.fpc))
4279                 /* User space provided an invalid FPC, let's clear it */
4280                 current->thread.fpu.fpc = 0;
4281
4282         /* Sync fmt2 only data */
4283         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4284                 sync_regs_fmt2(vcpu);
4285         } else {
4286                 /*
4287                  * In several places we have to modify our internal view to
4288                  * not do things that are disallowed by the ultravisor. For
4289                  * example we must not inject interrupts after specific exits
4290                  * (e.g. 112 prefix page not secure). We do this by turning
4291                  * off the machine check, external and I/O interrupt bits
4292                  * of our PSW copy. To avoid getting validity intercepts, we
4293                  * do only accept the condition code from userspace.
4294                  */
4295                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4296                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4297                                                    PSW_MASK_CC;
4298         }
4299
4300         kvm_run->kvm_dirty_regs = 0;
4301 }
4302
4303 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4304 {
4305         struct kvm_run *kvm_run = vcpu->run;
4306
4307         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4308         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4309         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4310         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4311         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4312         if (MACHINE_HAS_GS) {
4313                 __ctl_set_bit(2, 4);
4314                 if (vcpu->arch.gs_enabled)
4315                         save_gs_cb(current->thread.gs_cb);
4316                 preempt_disable();
4317                 current->thread.gs_cb = vcpu->arch.host_gscb;
4318                 restore_gs_cb(vcpu->arch.host_gscb);
4319                 preempt_enable();
4320                 if (!vcpu->arch.host_gscb)
4321                         __ctl_clear_bit(2, 4);
4322                 vcpu->arch.host_gscb = NULL;
4323         }
4324         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4325 }
4326
4327 static void store_regs(struct kvm_vcpu *vcpu)
4328 {
4329         struct kvm_run *kvm_run = vcpu->run;
4330
4331         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4332         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4333         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4334         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4335         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4336         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4337         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4338         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4339         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4340         save_access_regs(vcpu->run->s.regs.acrs);
4341         restore_access_regs(vcpu->arch.host_acrs);
4342         /* Save guest register state */
4343         save_fpu_regs();
4344         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4345         /* Restore will be done lazily at return */
4346         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4347         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4348         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4349                 store_regs_fmt2(vcpu);
4350 }
4351
4352 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4353 {
4354         struct kvm_run *kvm_run = vcpu->run;
4355         int rc;
4356
4357         if (kvm_run->immediate_exit)
4358                 return -EINTR;
4359
4360         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4361             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4362                 return -EINVAL;
4363
4364         vcpu_load(vcpu);
4365
4366         if (guestdbg_exit_pending(vcpu)) {
4367                 kvm_s390_prepare_debug_exit(vcpu);
4368                 rc = 0;
4369                 goto out;
4370         }
4371
4372         kvm_sigset_activate(vcpu);
4373
4374         /*
4375          * no need to check the return value of vcpu_start as it can only have
4376          * an error for protvirt, but protvirt means user cpu state
4377          */
4378         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4379                 kvm_s390_vcpu_start(vcpu);
4380         } else if (is_vcpu_stopped(vcpu)) {
4381                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4382                                    vcpu->vcpu_id);
4383                 rc = -EINVAL;
4384                 goto out;
4385         }
4386
4387         sync_regs(vcpu);
4388         enable_cpu_timer_accounting(vcpu);
4389
4390         might_fault();
4391         rc = __vcpu_run(vcpu);
4392
4393         if (signal_pending(current) && !rc) {
4394                 kvm_run->exit_reason = KVM_EXIT_INTR;
4395                 rc = -EINTR;
4396         }
4397
4398         if (guestdbg_exit_pending(vcpu) && !rc)  {
4399                 kvm_s390_prepare_debug_exit(vcpu);
4400                 rc = 0;
4401         }
4402
4403         if (rc == -EREMOTE) {
4404                 /* userspace support is needed, kvm_run has been prepared */
4405                 rc = 0;
4406         }
4407
4408         disable_cpu_timer_accounting(vcpu);
4409         store_regs(vcpu);
4410
4411         kvm_sigset_deactivate(vcpu);
4412
4413         vcpu->stat.exit_userspace++;
4414 out:
4415         vcpu_put(vcpu);
4416         return rc;
4417 }
4418
4419 /*
4420  * store status at address
4421  * we use have two special cases:
4422  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4423  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4424  */
4425 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4426 {
4427         unsigned char archmode = 1;
4428         freg_t fprs[NUM_FPRS];
4429         unsigned int px;
4430         u64 clkcomp, cputm;
4431         int rc;
4432
4433         px = kvm_s390_get_prefix(vcpu);
4434         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4435                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4436                         return -EFAULT;
4437                 gpa = 0;
4438         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4439                 if (write_guest_real(vcpu, 163, &archmode, 1))
4440                         return -EFAULT;
4441                 gpa = px;
4442         } else
4443                 gpa -= __LC_FPREGS_SAVE_AREA;
4444
4445         /* manually convert vector registers if necessary */
4446         if (MACHINE_HAS_VX) {
4447                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4448                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4449                                      fprs, 128);
4450         } else {
4451                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4452                                      vcpu->run->s.regs.fprs, 128);
4453         }
4454         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4455                               vcpu->run->s.regs.gprs, 128);
4456         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4457                               &vcpu->arch.sie_block->gpsw, 16);
4458         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4459                               &px, 4);
4460         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4461                               &vcpu->run->s.regs.fpc, 4);
4462         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4463                               &vcpu->arch.sie_block->todpr, 4);
4464         cputm = kvm_s390_get_cpu_timer(vcpu);
4465         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4466                               &cputm, 8);
4467         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4468         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4469                               &clkcomp, 8);
4470         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4471                               &vcpu->run->s.regs.acrs, 64);
4472         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4473                               &vcpu->arch.sie_block->gcr, 128);
4474         return rc ? -EFAULT : 0;
4475 }
4476
4477 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4478 {
4479         /*
4480          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4481          * switch in the run ioctl. Let's update our copies before we save
4482          * it into the save area
4483          */
4484         save_fpu_regs();
4485         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4486         save_access_regs(vcpu->run->s.regs.acrs);
4487
4488         return kvm_s390_store_status_unloaded(vcpu, addr);
4489 }
4490
4491 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4492 {
4493         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4494         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4495 }
4496
4497 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4498 {
4499         unsigned int i;
4500         struct kvm_vcpu *vcpu;
4501
4502         kvm_for_each_vcpu(i, vcpu, kvm) {
4503                 __disable_ibs_on_vcpu(vcpu);
4504         }
4505 }
4506
4507 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4508 {
4509         if (!sclp.has_ibs)
4510                 return;
4511         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4512         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4513 }
4514
4515 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4516 {
4517         int i, online_vcpus, r = 0, started_vcpus = 0;
4518
4519         if (!is_vcpu_stopped(vcpu))
4520                 return 0;
4521
4522         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4523         /* Only one cpu at a time may enter/leave the STOPPED state. */
4524         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4525         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4526
4527         /* Let's tell the UV that we want to change into the operating state */
4528         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4529                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4530                 if (r) {
4531                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4532                         return r;
4533                 }
4534         }
4535
4536         for (i = 0; i < online_vcpus; i++) {
4537                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4538                         started_vcpus++;
4539         }
4540
4541         if (started_vcpus == 0) {
4542                 /* we're the only active VCPU -> speed it up */
4543                 __enable_ibs_on_vcpu(vcpu);
4544         } else if (started_vcpus == 1) {
4545                 /*
4546                  * As we are starting a second VCPU, we have to disable
4547                  * the IBS facility on all VCPUs to remove potentially
4548                  * oustanding ENABLE requests.
4549                  */
4550                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4551         }
4552
4553         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4554         /*
4555          * The real PSW might have changed due to a RESTART interpreted by the
4556          * ultravisor. We block all interrupts and let the next sie exit
4557          * refresh our view.
4558          */
4559         if (kvm_s390_pv_cpu_is_protected(vcpu))
4560                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4561         /*
4562          * Another VCPU might have used IBS while we were offline.
4563          * Let's play safe and flush the VCPU at startup.
4564          */
4565         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4566         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4567         return 0;
4568 }
4569
4570 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4571 {
4572         int i, online_vcpus, r = 0, started_vcpus = 0;
4573         struct kvm_vcpu *started_vcpu = NULL;
4574
4575         if (is_vcpu_stopped(vcpu))
4576                 return 0;
4577
4578         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4579         /* Only one cpu at a time may enter/leave the STOPPED state. */
4580         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4581         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4582
4583         /* Let's tell the UV that we want to change into the stopped state */
4584         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4585                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4586                 if (r) {
4587                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4588                         return r;
4589                 }
4590         }
4591
4592         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4593         kvm_s390_clear_stop_irq(vcpu);
4594
4595         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4596         __disable_ibs_on_vcpu(vcpu);
4597
4598         for (i = 0; i < online_vcpus; i++) {
4599                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4600                         started_vcpus++;
4601                         started_vcpu = vcpu->kvm->vcpus[i];
4602                 }
4603         }
4604
4605         if (started_vcpus == 1) {
4606                 /*
4607                  * As we only have one VCPU left, we want to enable the
4608                  * IBS facility for that VCPU to speed it up.
4609                  */
4610                 __enable_ibs_on_vcpu(started_vcpu);
4611         }
4612
4613         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4614         return 0;
4615 }
4616
4617 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4618                                      struct kvm_enable_cap *cap)
4619 {
4620         int r;
4621
4622         if (cap->flags)
4623                 return -EINVAL;
4624
4625         switch (cap->cap) {
4626         case KVM_CAP_S390_CSS_SUPPORT:
4627                 if (!vcpu->kvm->arch.css_support) {
4628                         vcpu->kvm->arch.css_support = 1;
4629                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4630                         trace_kvm_s390_enable_css(vcpu->kvm);
4631                 }
4632                 r = 0;
4633                 break;
4634         default:
4635                 r = -EINVAL;
4636                 break;
4637         }
4638         return r;
4639 }
4640
4641 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4642                                    struct kvm_s390_mem_op *mop)
4643 {
4644         void __user *uaddr = (void __user *)mop->buf;
4645         int r = 0;
4646
4647         if (mop->flags || !mop->size)
4648                 return -EINVAL;
4649         if (mop->size + mop->sida_offset < mop->size)
4650                 return -EINVAL;
4651         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4652                 return -E2BIG;
4653
4654         switch (mop->op) {
4655         case KVM_S390_MEMOP_SIDA_READ:
4656                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4657                                  mop->sida_offset), mop->size))
4658                         r = -EFAULT;
4659
4660                 break;
4661         case KVM_S390_MEMOP_SIDA_WRITE:
4662                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4663                                    mop->sida_offset), uaddr, mop->size))
4664                         r = -EFAULT;
4665                 break;
4666         }
4667         return r;
4668 }
4669 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4670                                   struct kvm_s390_mem_op *mop)
4671 {
4672         void __user *uaddr = (void __user *)mop->buf;
4673         void *tmpbuf = NULL;
4674         int r = 0;
4675         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4676                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4677
4678         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4679                 return -EINVAL;
4680
4681         if (mop->size > MEM_OP_MAX_SIZE)
4682                 return -E2BIG;
4683
4684         if (kvm_s390_pv_cpu_is_protected(vcpu))
4685                 return -EINVAL;
4686
4687         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4688                 tmpbuf = vmalloc(mop->size);
4689                 if (!tmpbuf)
4690                         return -ENOMEM;
4691         }
4692
4693         switch (mop->op) {
4694         case KVM_S390_MEMOP_LOGICAL_READ:
4695                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4696                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4697                                             mop->size, GACC_FETCH);
4698                         break;
4699                 }
4700                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4701                 if (r == 0) {
4702                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4703                                 r = -EFAULT;
4704                 }
4705                 break;
4706         case KVM_S390_MEMOP_LOGICAL_WRITE:
4707                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4708                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4709                                             mop->size, GACC_STORE);
4710                         break;
4711                 }
4712                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4713                         r = -EFAULT;
4714                         break;
4715                 }
4716                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4717                 break;
4718         }
4719
4720         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4721                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4722
4723         vfree(tmpbuf);
4724         return r;
4725 }
4726
4727 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4728                                       struct kvm_s390_mem_op *mop)
4729 {
4730         int r, srcu_idx;
4731
4732         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4733
4734         switch (mop->op) {
4735         case KVM_S390_MEMOP_LOGICAL_READ:
4736         case KVM_S390_MEMOP_LOGICAL_WRITE:
4737                 r = kvm_s390_guest_mem_op(vcpu, mop);
4738                 break;
4739         case KVM_S390_MEMOP_SIDA_READ:
4740         case KVM_S390_MEMOP_SIDA_WRITE:
4741                 /* we are locked against sida going away by the vcpu->mutex */
4742                 r = kvm_s390_guest_sida_op(vcpu, mop);
4743                 break;
4744         default:
4745                 r = -EINVAL;
4746         }
4747
4748         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4749         return r;
4750 }
4751
4752 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4753                                unsigned int ioctl, unsigned long arg)
4754 {
4755         struct kvm_vcpu *vcpu = filp->private_data;
4756         void __user *argp = (void __user *)arg;
4757
4758         switch (ioctl) {
4759         case KVM_S390_IRQ: {
4760                 struct kvm_s390_irq s390irq;
4761
4762                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4763                         return -EFAULT;
4764                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4765         }
4766         case KVM_S390_INTERRUPT: {
4767                 struct kvm_s390_interrupt s390int;
4768                 struct kvm_s390_irq s390irq = {};
4769
4770                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4771                         return -EFAULT;
4772                 if (s390int_to_s390irq(&s390int, &s390irq))
4773                         return -EINVAL;
4774                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4775         }
4776         }
4777         return -ENOIOCTLCMD;
4778 }
4779
4780 long kvm_arch_vcpu_ioctl(struct file *filp,
4781                          unsigned int ioctl, unsigned long arg)
4782 {
4783         struct kvm_vcpu *vcpu = filp->private_data;
4784         void __user *argp = (void __user *)arg;
4785         int idx;
4786         long r;
4787         u16 rc, rrc;
4788
4789         vcpu_load(vcpu);
4790
4791         switch (ioctl) {
4792         case KVM_S390_STORE_STATUS:
4793                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4794                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4795                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4796                 break;
4797         case KVM_S390_SET_INITIAL_PSW: {
4798                 psw_t psw;
4799
4800                 r = -EFAULT;
4801                 if (copy_from_user(&psw, argp, sizeof(psw)))
4802                         break;
4803                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4804                 break;
4805         }
4806         case KVM_S390_CLEAR_RESET:
4807                 r = 0;
4808                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4809                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4810                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4811                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4812                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4813                                    rc, rrc);
4814                 }
4815                 break;
4816         case KVM_S390_INITIAL_RESET:
4817                 r = 0;
4818                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4819                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4820                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4821                                           UVC_CMD_CPU_RESET_INITIAL,
4822                                           &rc, &rrc);
4823                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4824                                    rc, rrc);
4825                 }
4826                 break;
4827         case KVM_S390_NORMAL_RESET:
4828                 r = 0;
4829                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4830                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4831                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4832                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4833                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4834                                    rc, rrc);
4835                 }
4836                 break;
4837         case KVM_SET_ONE_REG:
4838         case KVM_GET_ONE_REG: {
4839                 struct kvm_one_reg reg;
4840                 r = -EINVAL;
4841                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4842                         break;
4843                 r = -EFAULT;
4844                 if (copy_from_user(&reg, argp, sizeof(reg)))
4845                         break;
4846                 if (ioctl == KVM_SET_ONE_REG)
4847                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4848                 else
4849                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4850                 break;
4851         }
4852 #ifdef CONFIG_KVM_S390_UCONTROL
4853         case KVM_S390_UCAS_MAP: {
4854                 struct kvm_s390_ucas_mapping ucasmap;
4855
4856                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4857                         r = -EFAULT;
4858                         break;
4859                 }
4860
4861                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4862                         r = -EINVAL;
4863                         break;
4864                 }
4865
4866                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4867                                      ucasmap.vcpu_addr, ucasmap.length);
4868                 break;
4869         }
4870         case KVM_S390_UCAS_UNMAP: {
4871                 struct kvm_s390_ucas_mapping ucasmap;
4872
4873                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4874                         r = -EFAULT;
4875                         break;
4876                 }
4877
4878                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4879                         r = -EINVAL;
4880                         break;
4881                 }
4882
4883                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4884                         ucasmap.length);
4885                 break;
4886         }
4887 #endif
4888         case KVM_S390_VCPU_FAULT: {
4889                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4890                 break;
4891         }
4892         case KVM_ENABLE_CAP:
4893         {
4894                 struct kvm_enable_cap cap;
4895                 r = -EFAULT;
4896                 if (copy_from_user(&cap, argp, sizeof(cap)))
4897                         break;
4898                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4899                 break;
4900         }
4901         case KVM_S390_MEM_OP: {
4902                 struct kvm_s390_mem_op mem_op;
4903
4904                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4905                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4906                 else
4907                         r = -EFAULT;
4908                 break;
4909         }
4910         case KVM_S390_SET_IRQ_STATE: {
4911                 struct kvm_s390_irq_state irq_state;
4912
4913                 r = -EFAULT;
4914                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4915                         break;
4916                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4917                     irq_state.len == 0 ||
4918                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4919                         r = -EINVAL;
4920                         break;
4921                 }
4922                 /* do not use irq_state.flags, it will break old QEMUs */
4923                 r = kvm_s390_set_irq_state(vcpu,
4924                                            (void __user *) irq_state.buf,
4925                                            irq_state.len);
4926                 break;
4927         }
4928         case KVM_S390_GET_IRQ_STATE: {
4929                 struct kvm_s390_irq_state irq_state;
4930
4931                 r = -EFAULT;
4932                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4933                         break;
4934                 if (irq_state.len == 0) {
4935                         r = -EINVAL;
4936                         break;
4937                 }
4938                 /* do not use irq_state.flags, it will break old QEMUs */
4939                 r = kvm_s390_get_irq_state(vcpu,
4940                                            (__u8 __user *)  irq_state.buf,
4941                                            irq_state.len);
4942                 break;
4943         }
4944         default:
4945                 r = -ENOTTY;
4946         }
4947
4948         vcpu_put(vcpu);
4949         return r;
4950 }
4951
4952 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4953 {
4954 #ifdef CONFIG_KVM_S390_UCONTROL
4955         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4956                  && (kvm_is_ucontrol(vcpu->kvm))) {
4957                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4958                 get_page(vmf->page);
4959                 return 0;
4960         }
4961 #endif
4962         return VM_FAULT_SIGBUS;
4963 }
4964
4965 /* Section: memory related */
4966 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4967                                    struct kvm_memory_slot *memslot,
4968                                    const struct kvm_userspace_memory_region *mem,
4969                                    enum kvm_mr_change change)
4970 {
4971         /* A few sanity checks. We can have memory slots which have to be
4972            located/ended at a segment boundary (1MB). The memory in userland is
4973            ok to be fragmented into various different vmas. It is okay to mmap()
4974            and munmap() stuff in this slot after doing this call at any time */
4975
4976         if (mem->userspace_addr & 0xffffful)
4977                 return -EINVAL;
4978
4979         if (mem->memory_size & 0xffffful)
4980                 return -EINVAL;
4981
4982         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4983                 return -EINVAL;
4984
4985         /* When we are protected, we should not change the memory slots */
4986         if (kvm_s390_pv_get_handle(kvm))
4987                 return -EINVAL;
4988         return 0;
4989 }
4990
4991 void kvm_arch_commit_memory_region(struct kvm *kvm,
4992                                 const struct kvm_userspace_memory_region *mem,
4993                                 struct kvm_memory_slot *old,
4994                                 const struct kvm_memory_slot *new,
4995                                 enum kvm_mr_change change)
4996 {
4997         int rc = 0;
4998
4999         switch (change) {
5000         case KVM_MR_DELETE:
5001                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5002                                         old->npages * PAGE_SIZE);
5003                 break;
5004         case KVM_MR_MOVE:
5005                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5006                                         old->npages * PAGE_SIZE);
5007                 if (rc)
5008                         break;
5009                 fallthrough;
5010         case KVM_MR_CREATE:
5011                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5012                                       mem->guest_phys_addr, mem->memory_size);
5013                 break;
5014         case KVM_MR_FLAGS_ONLY:
5015                 break;
5016         default:
5017                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5018         }
5019         if (rc)
5020                 pr_warn("failed to commit memory region\n");
5021         return;
5022 }
5023
5024 static inline unsigned long nonhyp_mask(int i)
5025 {
5026         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5027
5028         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5029 }
5030
5031 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5032 {
5033         vcpu->valid_wakeup = false;
5034 }
5035
5036 static int __init kvm_s390_init(void)
5037 {
5038         int i;
5039
5040         if (!sclp.has_sief2) {
5041                 pr_info("SIE is not available\n");
5042                 return -ENODEV;
5043         }
5044
5045         if (nested && hpage) {
5046                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5047                 return -EINVAL;
5048         }
5049
5050         for (i = 0; i < 16; i++)
5051                 kvm_s390_fac_base[i] |=
5052                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5053
5054         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5055 }
5056
5057 static void __exit kvm_s390_exit(void)
5058 {
5059         kvm_exit();
5060 }
5061
5062 module_init(kvm_s390_init);
5063 module_exit(kvm_s390_exit);
5064
5065 /*
5066  * Enable autoloading of the kvm module.
5067  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5068  * since x86 takes a different approach.
5069  */
5070 #include <linux/miscdevice.h>
5071 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5072 MODULE_ALIAS("devname:kvm");