Merge tag 'riscv-for-linus-5.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 #include <linux/pgtable.h>
35
36 #include <asm/asm-offsets.h>
37 #include <asm/lowcore.h>
38 #include <asm/stp.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         VCPU_STAT("userspace_handled", exit_userspace),
62         VCPU_STAT("exit_null", exit_null),
63         VCPU_STAT("pfault_sync", pfault_sync),
64         VCPU_STAT("exit_validity", exit_validity),
65         VCPU_STAT("exit_stop_request", exit_stop_request),
66         VCPU_STAT("exit_external_request", exit_external_request),
67         VCPU_STAT("exit_io_request", exit_io_request),
68         VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
69         VCPU_STAT("exit_instruction", exit_instruction),
70         VCPU_STAT("exit_pei", exit_pei),
71         VCPU_STAT("exit_program_interruption", exit_program_interruption),
72         VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
73         VCPU_STAT("exit_operation_exception", exit_operation_exception),
74         VCPU_STAT("halt_successful_poll", halt_successful_poll),
75         VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
76         VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
77         VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
78         VCPU_STAT("halt_wakeup", halt_wakeup),
79         VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
80         VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
81         VCPU_STAT("instruction_lctlg", instruction_lctlg),
82         VCPU_STAT("instruction_lctl", instruction_lctl),
83         VCPU_STAT("instruction_stctl", instruction_stctl),
84         VCPU_STAT("instruction_stctg", instruction_stctg),
85         VCPU_STAT("deliver_ckc", deliver_ckc),
86         VCPU_STAT("deliver_cputm", deliver_cputm),
87         VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
88         VCPU_STAT("deliver_external_call", deliver_external_call),
89         VCPU_STAT("deliver_service_signal", deliver_service_signal),
90         VCPU_STAT("deliver_virtio", deliver_virtio),
91         VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
92         VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
93         VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
94         VCPU_STAT("deliver_program", deliver_program),
95         VCPU_STAT("deliver_io", deliver_io),
96         VCPU_STAT("deliver_machine_check", deliver_machine_check),
97         VCPU_STAT("exit_wait_state", exit_wait_state),
98         VCPU_STAT("inject_ckc", inject_ckc),
99         VCPU_STAT("inject_cputm", inject_cputm),
100         VCPU_STAT("inject_external_call", inject_external_call),
101         VM_STAT("inject_float_mchk", inject_float_mchk),
102         VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
103         VM_STAT("inject_io", inject_io),
104         VCPU_STAT("inject_mchk", inject_mchk),
105         VM_STAT("inject_pfault_done", inject_pfault_done),
106         VCPU_STAT("inject_program", inject_program),
107         VCPU_STAT("inject_restart", inject_restart),
108         VM_STAT("inject_service_signal", inject_service_signal),
109         VCPU_STAT("inject_set_prefix", inject_set_prefix),
110         VCPU_STAT("inject_stop_signal", inject_stop_signal),
111         VCPU_STAT("inject_pfault_init", inject_pfault_init),
112         VM_STAT("inject_virtio", inject_virtio),
113         VCPU_STAT("instruction_epsw", instruction_epsw),
114         VCPU_STAT("instruction_gs", instruction_gs),
115         VCPU_STAT("instruction_io_other", instruction_io_other),
116         VCPU_STAT("instruction_lpsw", instruction_lpsw),
117         VCPU_STAT("instruction_lpswe", instruction_lpswe),
118         VCPU_STAT("instruction_pfmf", instruction_pfmf),
119         VCPU_STAT("instruction_ptff", instruction_ptff),
120         VCPU_STAT("instruction_stidp", instruction_stidp),
121         VCPU_STAT("instruction_sck", instruction_sck),
122         VCPU_STAT("instruction_sckpf", instruction_sckpf),
123         VCPU_STAT("instruction_spx", instruction_spx),
124         VCPU_STAT("instruction_stpx", instruction_stpx),
125         VCPU_STAT("instruction_stap", instruction_stap),
126         VCPU_STAT("instruction_iske", instruction_iske),
127         VCPU_STAT("instruction_ri", instruction_ri),
128         VCPU_STAT("instruction_rrbe", instruction_rrbe),
129         VCPU_STAT("instruction_sske", instruction_sske),
130         VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
131         VCPU_STAT("instruction_essa", instruction_essa),
132         VCPU_STAT("instruction_stsi", instruction_stsi),
133         VCPU_STAT("instruction_stfl", instruction_stfl),
134         VCPU_STAT("instruction_tb", instruction_tb),
135         VCPU_STAT("instruction_tpi", instruction_tpi),
136         VCPU_STAT("instruction_tprot", instruction_tprot),
137         VCPU_STAT("instruction_tsch", instruction_tsch),
138         VCPU_STAT("instruction_sthyi", instruction_sthyi),
139         VCPU_STAT("instruction_sie", instruction_sie),
140         VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
141         VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
142         VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
143         VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
144         VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
145         VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
146         VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
147         VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
148         VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
149         VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
150         VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
151         VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
152         VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
153         VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
154         VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
155         VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
156         VCPU_STAT("instruction_diag_10", diagnose_10),
157         VCPU_STAT("instruction_diag_44", diagnose_44),
158         VCPU_STAT("instruction_diag_9c", diagnose_9c),
159         VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
160         VCPU_STAT("instruction_diag_258", diagnose_258),
161         VCPU_STAT("instruction_diag_308", diagnose_308),
162         VCPU_STAT("instruction_diag_500", diagnose_500),
163         VCPU_STAT("instruction_diag_other", diagnose_other),
164         { NULL }
165 };
166
167 struct kvm_s390_tod_clock_ext {
168         __u8 epoch_idx;
169         __u64 tod;
170         __u8 reserved[7];
171 } __packed;
172
173 /* allow nested virtualization in KVM (if enabled by user space) */
174 static int nested;
175 module_param(nested, int, S_IRUGO);
176 MODULE_PARM_DESC(nested, "Nested virtualization support");
177
178 /* allow 1m huge page guest backing, if !nested */
179 static int hpage;
180 module_param(hpage, int, 0444);
181 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182
183 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
184 static u8 halt_poll_max_steal = 10;
185 module_param(halt_poll_max_steal, byte, 0644);
186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
187
188 /* if set to true, the GISA will be initialized and used if available */
189 static bool use_gisa  = true;
190 module_param(use_gisa, bool, 0644);
191 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
192
193 /*
194  * For now we handle at most 16 double words as this is what the s390 base
195  * kernel handles and stores in the prefix page. If we ever need to go beyond
196  * this, this requires changes to code, but the external uapi can stay.
197  */
198 #define SIZE_INTERNAL 16
199
200 /*
201  * Base feature mask that defines default mask for facilities. Consists of the
202  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
203  */
204 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
205 /*
206  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
207  * and defines the facilities that can be enabled via a cpu model.
208  */
209 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
210
211 static unsigned long kvm_s390_fac_size(void)
212 {
213         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
214         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
215         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
216                 sizeof(S390_lowcore.stfle_fac_list));
217
218         return SIZE_INTERNAL;
219 }
220
221 /* available cpu features supported by kvm */
222 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
223 /* available subfunctions indicated via query / "test bit" */
224 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
225
226 static struct gmap_notifier gmap_notifier;
227 static struct gmap_notifier vsie_gmap_notifier;
228 debug_info_t *kvm_s390_dbf;
229 debug_info_t *kvm_s390_dbf_uv;
230
231 /* Section: not file related */
232 int kvm_arch_hardware_enable(void)
233 {
234         /* every s390 is virtualization enabled ;-) */
235         return 0;
236 }
237
238 int kvm_arch_check_processor_compat(void *opaque)
239 {
240         return 0;
241 }
242
243 /* forward declarations */
244 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
245                               unsigned long end);
246 static int sca_switch_to_extended(struct kvm *kvm);
247
248 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
249 {
250         u8 delta_idx = 0;
251
252         /*
253          * The TOD jumps by delta, we have to compensate this by adding
254          * -delta to the epoch.
255          */
256         delta = -delta;
257
258         /* sign-extension - we're adding to signed values below */
259         if ((s64)delta < 0)
260                 delta_idx = -1;
261
262         scb->epoch += delta;
263         if (scb->ecd & ECD_MEF) {
264                 scb->epdx += delta_idx;
265                 if (scb->epoch < delta)
266                         scb->epdx += 1;
267         }
268 }
269
270 /*
271  * This callback is executed during stop_machine(). All CPUs are therefore
272  * temporarily stopped. In order not to change guest behavior, we have to
273  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
274  * so a CPU won't be stopped while calculating with the epoch.
275  */
276 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
277                           void *v)
278 {
279         struct kvm *kvm;
280         struct kvm_vcpu *vcpu;
281         int i;
282         unsigned long long *delta = v;
283
284         list_for_each_entry(kvm, &vm_list, vm_list) {
285                 kvm_for_each_vcpu(i, vcpu, kvm) {
286                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
287                         if (i == 0) {
288                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
289                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
290                         }
291                         if (vcpu->arch.cputm_enabled)
292                                 vcpu->arch.cputm_start += *delta;
293                         if (vcpu->arch.vsie_block)
294                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
295                                                    *delta);
296                 }
297         }
298         return NOTIFY_OK;
299 }
300
301 static struct notifier_block kvm_clock_notifier = {
302         .notifier_call = kvm_clock_sync,
303 };
304
305 int kvm_arch_hardware_setup(void *opaque)
306 {
307         gmap_notifier.notifier_call = kvm_gmap_notifier;
308         gmap_register_pte_notifier(&gmap_notifier);
309         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
310         gmap_register_pte_notifier(&vsie_gmap_notifier);
311         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
312                                        &kvm_clock_notifier);
313         return 0;
314 }
315
316 void kvm_arch_hardware_unsetup(void)
317 {
318         gmap_unregister_pte_notifier(&gmap_notifier);
319         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
320         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
321                                          &kvm_clock_notifier);
322 }
323
324 static void allow_cpu_feat(unsigned long nr)
325 {
326         set_bit_inv(nr, kvm_s390_available_cpu_feat);
327 }
328
329 static inline int plo_test_bit(unsigned char nr)
330 {
331         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
332         int cc;
333
334         asm volatile(
335                 /* Parameter registers are ignored for "test bit" */
336                 "       plo     0,0,0,0(0)\n"
337                 "       ipm     %0\n"
338                 "       srl     %0,28\n"
339                 : "=d" (cc)
340                 : "d" (r0)
341                 : "cc");
342         return cc == 0;
343 }
344
345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347         register unsigned long r0 asm("0") = 0; /* query function */
348         register unsigned long r1 asm("1") = (unsigned long) query;
349
350         asm volatile(
351                 /* Parameter regs are ignored */
352                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
353                 :
354                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
355                 : "cc", "memory");
356 }
357
358 #define INSN_SORTL 0xb938
359 #define INSN_DFLTCC 0xb939
360
361 static void kvm_s390_cpu_feat_init(void)
362 {
363         int i;
364
365         for (i = 0; i < 256; ++i) {
366                 if (plo_test_bit(i))
367                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
368         }
369
370         if (test_facility(28)) /* TOD-clock steering */
371                 ptff(kvm_s390_available_subfunc.ptff,
372                      sizeof(kvm_s390_available_subfunc.ptff),
373                      PTFF_QAF);
374
375         if (test_facility(17)) { /* MSA */
376                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
377                               kvm_s390_available_subfunc.kmac);
378                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
379                               kvm_s390_available_subfunc.kmc);
380                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
381                               kvm_s390_available_subfunc.km);
382                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
383                               kvm_s390_available_subfunc.kimd);
384                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
385                               kvm_s390_available_subfunc.klmd);
386         }
387         if (test_facility(76)) /* MSA3 */
388                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
389                               kvm_s390_available_subfunc.pckmo);
390         if (test_facility(77)) { /* MSA4 */
391                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
392                               kvm_s390_available_subfunc.kmctr);
393                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
394                               kvm_s390_available_subfunc.kmf);
395                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
396                               kvm_s390_available_subfunc.kmo);
397                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
398                               kvm_s390_available_subfunc.pcc);
399         }
400         if (test_facility(57)) /* MSA5 */
401                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
402                               kvm_s390_available_subfunc.ppno);
403
404         if (test_facility(146)) /* MSA8 */
405                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
406                               kvm_s390_available_subfunc.kma);
407
408         if (test_facility(155)) /* MSA9 */
409                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
410                               kvm_s390_available_subfunc.kdsa);
411
412         if (test_facility(150)) /* SORTL */
413                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
414
415         if (test_facility(151)) /* DFLTCC */
416                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
417
418         if (MACHINE_HAS_ESOP)
419                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
420         /*
421          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
422          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
423          */
424         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
425             !test_facility(3) || !nested)
426                 return;
427         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
428         if (sclp.has_64bscao)
429                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
430         if (sclp.has_siif)
431                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
432         if (sclp.has_gpere)
433                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
434         if (sclp.has_gsls)
435                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
436         if (sclp.has_ib)
437                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
438         if (sclp.has_cei)
439                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
440         if (sclp.has_ibs)
441                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
442         if (sclp.has_kss)
443                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
444         /*
445          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
446          * all skey handling functions read/set the skey from the PGSTE
447          * instead of the real storage key.
448          *
449          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
450          * pages being detected as preserved although they are resident.
451          *
452          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
453          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
454          *
455          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
456          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
457          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
458          *
459          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
460          * cannot easily shadow the SCA because of the ipte lock.
461          */
462 }
463
464 int kvm_arch_init(void *opaque)
465 {
466         int rc = -ENOMEM;
467
468         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
469         if (!kvm_s390_dbf)
470                 return -ENOMEM;
471
472         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
473         if (!kvm_s390_dbf_uv)
474                 goto out;
475
476         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
477             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
478                 goto out;
479
480         kvm_s390_cpu_feat_init();
481
482         /* Register floating interrupt controller interface. */
483         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
484         if (rc) {
485                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
486                 goto out;
487         }
488
489         rc = kvm_s390_gib_init(GAL_ISC);
490         if (rc)
491                 goto out;
492
493         return 0;
494
495 out:
496         kvm_arch_exit();
497         return rc;
498 }
499
500 void kvm_arch_exit(void)
501 {
502         kvm_s390_gib_destroy();
503         debug_unregister(kvm_s390_dbf);
504         debug_unregister(kvm_s390_dbf_uv);
505 }
506
507 /* Section: device related */
508 long kvm_arch_dev_ioctl(struct file *filp,
509                         unsigned int ioctl, unsigned long arg)
510 {
511         if (ioctl == KVM_S390_ENABLE_SIE)
512                 return s390_enable_sie();
513         return -EINVAL;
514 }
515
516 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
517 {
518         int r;
519
520         switch (ext) {
521         case KVM_CAP_S390_PSW:
522         case KVM_CAP_S390_GMAP:
523         case KVM_CAP_SYNC_MMU:
524 #ifdef CONFIG_KVM_S390_UCONTROL
525         case KVM_CAP_S390_UCONTROL:
526 #endif
527         case KVM_CAP_ASYNC_PF:
528         case KVM_CAP_SYNC_REGS:
529         case KVM_CAP_ONE_REG:
530         case KVM_CAP_ENABLE_CAP:
531         case KVM_CAP_S390_CSS_SUPPORT:
532         case KVM_CAP_IOEVENTFD:
533         case KVM_CAP_DEVICE_CTRL:
534         case KVM_CAP_S390_IRQCHIP:
535         case KVM_CAP_VM_ATTRIBUTES:
536         case KVM_CAP_MP_STATE:
537         case KVM_CAP_IMMEDIATE_EXIT:
538         case KVM_CAP_S390_INJECT_IRQ:
539         case KVM_CAP_S390_USER_SIGP:
540         case KVM_CAP_S390_USER_STSI:
541         case KVM_CAP_S390_SKEYS:
542         case KVM_CAP_S390_IRQ_STATE:
543         case KVM_CAP_S390_USER_INSTR0:
544         case KVM_CAP_S390_CMMA_MIGRATION:
545         case KVM_CAP_S390_AIS:
546         case KVM_CAP_S390_AIS_MIGRATION:
547         case KVM_CAP_S390_VCPU_RESETS:
548         case KVM_CAP_SET_GUEST_DEBUG:
549         case KVM_CAP_S390_DIAG318:
550                 r = 1;
551                 break;
552         case KVM_CAP_S390_HPAGE_1M:
553                 r = 0;
554                 if (hpage && !kvm_is_ucontrol(kvm))
555                         r = 1;
556                 break;
557         case KVM_CAP_S390_MEM_OP:
558                 r = MEM_OP_MAX_SIZE;
559                 break;
560         case KVM_CAP_NR_VCPUS:
561         case KVM_CAP_MAX_VCPUS:
562         case KVM_CAP_MAX_VCPU_ID:
563                 r = KVM_S390_BSCA_CPU_SLOTS;
564                 if (!kvm_s390_use_sca_entries())
565                         r = KVM_MAX_VCPUS;
566                 else if (sclp.has_esca && sclp.has_64bscao)
567                         r = KVM_S390_ESCA_CPU_SLOTS;
568                 break;
569         case KVM_CAP_S390_COW:
570                 r = MACHINE_HAS_ESOP;
571                 break;
572         case KVM_CAP_S390_VECTOR_REGISTERS:
573                 r = MACHINE_HAS_VX;
574                 break;
575         case KVM_CAP_S390_RI:
576                 r = test_facility(64);
577                 break;
578         case KVM_CAP_S390_GS:
579                 r = test_facility(133);
580                 break;
581         case KVM_CAP_S390_BPB:
582                 r = test_facility(82);
583                 break;
584         case KVM_CAP_S390_PROTECTED:
585                 r = is_prot_virt_host();
586                 break;
587         default:
588                 r = 0;
589         }
590         return r;
591 }
592
593 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
594 {
595         int i;
596         gfn_t cur_gfn, last_gfn;
597         unsigned long gaddr, vmaddr;
598         struct gmap *gmap = kvm->arch.gmap;
599         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
600
601         /* Loop over all guest segments */
602         cur_gfn = memslot->base_gfn;
603         last_gfn = memslot->base_gfn + memslot->npages;
604         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
605                 gaddr = gfn_to_gpa(cur_gfn);
606                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
607                 if (kvm_is_error_hva(vmaddr))
608                         continue;
609
610                 bitmap_zero(bitmap, _PAGE_ENTRIES);
611                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
612                 for (i = 0; i < _PAGE_ENTRIES; i++) {
613                         if (test_bit(i, bitmap))
614                                 mark_page_dirty(kvm, cur_gfn + i);
615                 }
616
617                 if (fatal_signal_pending(current))
618                         return;
619                 cond_resched();
620         }
621 }
622
623 /* Section: vm related */
624 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
625
626 /*
627  * Get (and clear) the dirty memory log for a memory slot.
628  */
629 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
630                                struct kvm_dirty_log *log)
631 {
632         int r;
633         unsigned long n;
634         struct kvm_memory_slot *memslot;
635         int is_dirty;
636
637         if (kvm_is_ucontrol(kvm))
638                 return -EINVAL;
639
640         mutex_lock(&kvm->slots_lock);
641
642         r = -EINVAL;
643         if (log->slot >= KVM_USER_MEM_SLOTS)
644                 goto out;
645
646         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
647         if (r)
648                 goto out;
649
650         /* Clear the dirty log */
651         if (is_dirty) {
652                 n = kvm_dirty_bitmap_bytes(memslot);
653                 memset(memslot->dirty_bitmap, 0, n);
654         }
655         r = 0;
656 out:
657         mutex_unlock(&kvm->slots_lock);
658         return r;
659 }
660
661 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
662 {
663         unsigned int i;
664         struct kvm_vcpu *vcpu;
665
666         kvm_for_each_vcpu(i, vcpu, kvm) {
667                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
668         }
669 }
670
671 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
672 {
673         int r;
674
675         if (cap->flags)
676                 return -EINVAL;
677
678         switch (cap->cap) {
679         case KVM_CAP_S390_IRQCHIP:
680                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
681                 kvm->arch.use_irqchip = 1;
682                 r = 0;
683                 break;
684         case KVM_CAP_S390_USER_SIGP:
685                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
686                 kvm->arch.user_sigp = 1;
687                 r = 0;
688                 break;
689         case KVM_CAP_S390_VECTOR_REGISTERS:
690                 mutex_lock(&kvm->lock);
691                 if (kvm->created_vcpus) {
692                         r = -EBUSY;
693                 } else if (MACHINE_HAS_VX) {
694                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
695                         set_kvm_facility(kvm->arch.model.fac_list, 129);
696                         if (test_facility(134)) {
697                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
698                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
699                         }
700                         if (test_facility(135)) {
701                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
702                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
703                         }
704                         if (test_facility(148)) {
705                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
706                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
707                         }
708                         if (test_facility(152)) {
709                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
710                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
711                         }
712                         r = 0;
713                 } else
714                         r = -EINVAL;
715                 mutex_unlock(&kvm->lock);
716                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
717                          r ? "(not available)" : "(success)");
718                 break;
719         case KVM_CAP_S390_RI:
720                 r = -EINVAL;
721                 mutex_lock(&kvm->lock);
722                 if (kvm->created_vcpus) {
723                         r = -EBUSY;
724                 } else if (test_facility(64)) {
725                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
726                         set_kvm_facility(kvm->arch.model.fac_list, 64);
727                         r = 0;
728                 }
729                 mutex_unlock(&kvm->lock);
730                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
731                          r ? "(not available)" : "(success)");
732                 break;
733         case KVM_CAP_S390_AIS:
734                 mutex_lock(&kvm->lock);
735                 if (kvm->created_vcpus) {
736                         r = -EBUSY;
737                 } else {
738                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
739                         set_kvm_facility(kvm->arch.model.fac_list, 72);
740                         r = 0;
741                 }
742                 mutex_unlock(&kvm->lock);
743                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
744                          r ? "(not available)" : "(success)");
745                 break;
746         case KVM_CAP_S390_GS:
747                 r = -EINVAL;
748                 mutex_lock(&kvm->lock);
749                 if (kvm->created_vcpus) {
750                         r = -EBUSY;
751                 } else if (test_facility(133)) {
752                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
753                         set_kvm_facility(kvm->arch.model.fac_list, 133);
754                         r = 0;
755                 }
756                 mutex_unlock(&kvm->lock);
757                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
758                          r ? "(not available)" : "(success)");
759                 break;
760         case KVM_CAP_S390_HPAGE_1M:
761                 mutex_lock(&kvm->lock);
762                 if (kvm->created_vcpus)
763                         r = -EBUSY;
764                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
765                         r = -EINVAL;
766                 else {
767                         r = 0;
768                         mmap_write_lock(kvm->mm);
769                         kvm->mm->context.allow_gmap_hpage_1m = 1;
770                         mmap_write_unlock(kvm->mm);
771                         /*
772                          * We might have to create fake 4k page
773                          * tables. To avoid that the hardware works on
774                          * stale PGSTEs, we emulate these instructions.
775                          */
776                         kvm->arch.use_skf = 0;
777                         kvm->arch.use_pfmfi = 0;
778                 }
779                 mutex_unlock(&kvm->lock);
780                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
781                          r ? "(not available)" : "(success)");
782                 break;
783         case KVM_CAP_S390_USER_STSI:
784                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
785                 kvm->arch.user_stsi = 1;
786                 r = 0;
787                 break;
788         case KVM_CAP_S390_USER_INSTR0:
789                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
790                 kvm->arch.user_instr0 = 1;
791                 icpt_operexc_on_all_vcpus(kvm);
792                 r = 0;
793                 break;
794         default:
795                 r = -EINVAL;
796                 break;
797         }
798         return r;
799 }
800
801 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
802 {
803         int ret;
804
805         switch (attr->attr) {
806         case KVM_S390_VM_MEM_LIMIT_SIZE:
807                 ret = 0;
808                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
809                          kvm->arch.mem_limit);
810                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
811                         ret = -EFAULT;
812                 break;
813         default:
814                 ret = -ENXIO;
815                 break;
816         }
817         return ret;
818 }
819
820 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
821 {
822         int ret;
823         unsigned int idx;
824         switch (attr->attr) {
825         case KVM_S390_VM_MEM_ENABLE_CMMA:
826                 ret = -ENXIO;
827                 if (!sclp.has_cmma)
828                         break;
829
830                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
831                 mutex_lock(&kvm->lock);
832                 if (kvm->created_vcpus)
833                         ret = -EBUSY;
834                 else if (kvm->mm->context.allow_gmap_hpage_1m)
835                         ret = -EINVAL;
836                 else {
837                         kvm->arch.use_cmma = 1;
838                         /* Not compatible with cmma. */
839                         kvm->arch.use_pfmfi = 0;
840                         ret = 0;
841                 }
842                 mutex_unlock(&kvm->lock);
843                 break;
844         case KVM_S390_VM_MEM_CLR_CMMA:
845                 ret = -ENXIO;
846                 if (!sclp.has_cmma)
847                         break;
848                 ret = -EINVAL;
849                 if (!kvm->arch.use_cmma)
850                         break;
851
852                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
853                 mutex_lock(&kvm->lock);
854                 idx = srcu_read_lock(&kvm->srcu);
855                 s390_reset_cmma(kvm->arch.gmap->mm);
856                 srcu_read_unlock(&kvm->srcu, idx);
857                 mutex_unlock(&kvm->lock);
858                 ret = 0;
859                 break;
860         case KVM_S390_VM_MEM_LIMIT_SIZE: {
861                 unsigned long new_limit;
862
863                 if (kvm_is_ucontrol(kvm))
864                         return -EINVAL;
865
866                 if (get_user(new_limit, (u64 __user *)attr->addr))
867                         return -EFAULT;
868
869                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
870                     new_limit > kvm->arch.mem_limit)
871                         return -E2BIG;
872
873                 if (!new_limit)
874                         return -EINVAL;
875
876                 /* gmap_create takes last usable address */
877                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
878                         new_limit -= 1;
879
880                 ret = -EBUSY;
881                 mutex_lock(&kvm->lock);
882                 if (!kvm->created_vcpus) {
883                         /* gmap_create will round the limit up */
884                         struct gmap *new = gmap_create(current->mm, new_limit);
885
886                         if (!new) {
887                                 ret = -ENOMEM;
888                         } else {
889                                 gmap_remove(kvm->arch.gmap);
890                                 new->private = kvm;
891                                 kvm->arch.gmap = new;
892                                 ret = 0;
893                         }
894                 }
895                 mutex_unlock(&kvm->lock);
896                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
897                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
898                          (void *) kvm->arch.gmap->asce);
899                 break;
900         }
901         default:
902                 ret = -ENXIO;
903                 break;
904         }
905         return ret;
906 }
907
908 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
909
910 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
911 {
912         struct kvm_vcpu *vcpu;
913         int i;
914
915         kvm_s390_vcpu_block_all(kvm);
916
917         kvm_for_each_vcpu(i, vcpu, kvm) {
918                 kvm_s390_vcpu_crypto_setup(vcpu);
919                 /* recreate the shadow crycb by leaving the VSIE handler */
920                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
921         }
922
923         kvm_s390_vcpu_unblock_all(kvm);
924 }
925
926 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
927 {
928         mutex_lock(&kvm->lock);
929         switch (attr->attr) {
930         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
931                 if (!test_kvm_facility(kvm, 76)) {
932                         mutex_unlock(&kvm->lock);
933                         return -EINVAL;
934                 }
935                 get_random_bytes(
936                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
937                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
938                 kvm->arch.crypto.aes_kw = 1;
939                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
940                 break;
941         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
942                 if (!test_kvm_facility(kvm, 76)) {
943                         mutex_unlock(&kvm->lock);
944                         return -EINVAL;
945                 }
946                 get_random_bytes(
947                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
948                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
949                 kvm->arch.crypto.dea_kw = 1;
950                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
951                 break;
952         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
953                 if (!test_kvm_facility(kvm, 76)) {
954                         mutex_unlock(&kvm->lock);
955                         return -EINVAL;
956                 }
957                 kvm->arch.crypto.aes_kw = 0;
958                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
959                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
960                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
961                 break;
962         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
963                 if (!test_kvm_facility(kvm, 76)) {
964                         mutex_unlock(&kvm->lock);
965                         return -EINVAL;
966                 }
967                 kvm->arch.crypto.dea_kw = 0;
968                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
969                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
970                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
971                 break;
972         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
973                 if (!ap_instructions_available()) {
974                         mutex_unlock(&kvm->lock);
975                         return -EOPNOTSUPP;
976                 }
977                 kvm->arch.crypto.apie = 1;
978                 break;
979         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
980                 if (!ap_instructions_available()) {
981                         mutex_unlock(&kvm->lock);
982                         return -EOPNOTSUPP;
983                 }
984                 kvm->arch.crypto.apie = 0;
985                 break;
986         default:
987                 mutex_unlock(&kvm->lock);
988                 return -ENXIO;
989         }
990
991         kvm_s390_vcpu_crypto_reset_all(kvm);
992         mutex_unlock(&kvm->lock);
993         return 0;
994 }
995
996 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
997 {
998         int cx;
999         struct kvm_vcpu *vcpu;
1000
1001         kvm_for_each_vcpu(cx, vcpu, kvm)
1002                 kvm_s390_sync_request(req, vcpu);
1003 }
1004
1005 /*
1006  * Must be called with kvm->srcu held to avoid races on memslots, and with
1007  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1008  */
1009 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1010 {
1011         struct kvm_memory_slot *ms;
1012         struct kvm_memslots *slots;
1013         unsigned long ram_pages = 0;
1014         int slotnr;
1015
1016         /* migration mode already enabled */
1017         if (kvm->arch.migration_mode)
1018                 return 0;
1019         slots = kvm_memslots(kvm);
1020         if (!slots || !slots->used_slots)
1021                 return -EINVAL;
1022
1023         if (!kvm->arch.use_cmma) {
1024                 kvm->arch.migration_mode = 1;
1025                 return 0;
1026         }
1027         /* mark all the pages in active slots as dirty */
1028         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1029                 ms = slots->memslots + slotnr;
1030                 if (!ms->dirty_bitmap)
1031                         return -EINVAL;
1032                 /*
1033                  * The second half of the bitmap is only used on x86,
1034                  * and would be wasted otherwise, so we put it to good
1035                  * use here to keep track of the state of the storage
1036                  * attributes.
1037                  */
1038                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1039                 ram_pages += ms->npages;
1040         }
1041         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1042         kvm->arch.migration_mode = 1;
1043         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1044         return 0;
1045 }
1046
1047 /*
1048  * Must be called with kvm->slots_lock to avoid races with ourselves and
1049  * kvm_s390_vm_start_migration.
1050  */
1051 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1052 {
1053         /* migration mode already disabled */
1054         if (!kvm->arch.migration_mode)
1055                 return 0;
1056         kvm->arch.migration_mode = 0;
1057         if (kvm->arch.use_cmma)
1058                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1059         return 0;
1060 }
1061
1062 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1063                                      struct kvm_device_attr *attr)
1064 {
1065         int res = -ENXIO;
1066
1067         mutex_lock(&kvm->slots_lock);
1068         switch (attr->attr) {
1069         case KVM_S390_VM_MIGRATION_START:
1070                 res = kvm_s390_vm_start_migration(kvm);
1071                 break;
1072         case KVM_S390_VM_MIGRATION_STOP:
1073                 res = kvm_s390_vm_stop_migration(kvm);
1074                 break;
1075         default:
1076                 break;
1077         }
1078         mutex_unlock(&kvm->slots_lock);
1079
1080         return res;
1081 }
1082
1083 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1084                                      struct kvm_device_attr *attr)
1085 {
1086         u64 mig = kvm->arch.migration_mode;
1087
1088         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1089                 return -ENXIO;
1090
1091         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1092                 return -EFAULT;
1093         return 0;
1094 }
1095
1096 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1097 {
1098         struct kvm_s390_vm_tod_clock gtod;
1099
1100         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1101                 return -EFAULT;
1102
1103         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1104                 return -EINVAL;
1105         kvm_s390_set_tod_clock(kvm, &gtod);
1106
1107         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1108                 gtod.epoch_idx, gtod.tod);
1109
1110         return 0;
1111 }
1112
1113 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1114 {
1115         u8 gtod_high;
1116
1117         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1118                                            sizeof(gtod_high)))
1119                 return -EFAULT;
1120
1121         if (gtod_high != 0)
1122                 return -EINVAL;
1123         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1124
1125         return 0;
1126 }
1127
1128 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1129 {
1130         struct kvm_s390_vm_tod_clock gtod = { 0 };
1131
1132         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1133                            sizeof(gtod.tod)))
1134                 return -EFAULT;
1135
1136         kvm_s390_set_tod_clock(kvm, &gtod);
1137         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1138         return 0;
1139 }
1140
1141 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1142 {
1143         int ret;
1144
1145         if (attr->flags)
1146                 return -EINVAL;
1147
1148         switch (attr->attr) {
1149         case KVM_S390_VM_TOD_EXT:
1150                 ret = kvm_s390_set_tod_ext(kvm, attr);
1151                 break;
1152         case KVM_S390_VM_TOD_HIGH:
1153                 ret = kvm_s390_set_tod_high(kvm, attr);
1154                 break;
1155         case KVM_S390_VM_TOD_LOW:
1156                 ret = kvm_s390_set_tod_low(kvm, attr);
1157                 break;
1158         default:
1159                 ret = -ENXIO;
1160                 break;
1161         }
1162         return ret;
1163 }
1164
1165 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1166                                    struct kvm_s390_vm_tod_clock *gtod)
1167 {
1168         struct kvm_s390_tod_clock_ext htod;
1169
1170         preempt_disable();
1171
1172         get_tod_clock_ext((char *)&htod);
1173
1174         gtod->tod = htod.tod + kvm->arch.epoch;
1175         gtod->epoch_idx = 0;
1176         if (test_kvm_facility(kvm, 139)) {
1177                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1178                 if (gtod->tod < htod.tod)
1179                         gtod->epoch_idx += 1;
1180         }
1181
1182         preempt_enable();
1183 }
1184
1185 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1186 {
1187         struct kvm_s390_vm_tod_clock gtod;
1188
1189         memset(&gtod, 0, sizeof(gtod));
1190         kvm_s390_get_tod_clock(kvm, &gtod);
1191         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1192                 return -EFAULT;
1193
1194         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1195                 gtod.epoch_idx, gtod.tod);
1196         return 0;
1197 }
1198
1199 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201         u8 gtod_high = 0;
1202
1203         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1204                                          sizeof(gtod_high)))
1205                 return -EFAULT;
1206         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1207
1208         return 0;
1209 }
1210
1211 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213         u64 gtod;
1214
1215         gtod = kvm_s390_get_tod_clock_fast(kvm);
1216         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217                 return -EFAULT;
1218         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1219
1220         return 0;
1221 }
1222
1223 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1224 {
1225         int ret;
1226
1227         if (attr->flags)
1228                 return -EINVAL;
1229
1230         switch (attr->attr) {
1231         case KVM_S390_VM_TOD_EXT:
1232                 ret = kvm_s390_get_tod_ext(kvm, attr);
1233                 break;
1234         case KVM_S390_VM_TOD_HIGH:
1235                 ret = kvm_s390_get_tod_high(kvm, attr);
1236                 break;
1237         case KVM_S390_VM_TOD_LOW:
1238                 ret = kvm_s390_get_tod_low(kvm, attr);
1239                 break;
1240         default:
1241                 ret = -ENXIO;
1242                 break;
1243         }
1244         return ret;
1245 }
1246
1247 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1248 {
1249         struct kvm_s390_vm_cpu_processor *proc;
1250         u16 lowest_ibc, unblocked_ibc;
1251         int ret = 0;
1252
1253         mutex_lock(&kvm->lock);
1254         if (kvm->created_vcpus) {
1255                 ret = -EBUSY;
1256                 goto out;
1257         }
1258         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1259         if (!proc) {
1260                 ret = -ENOMEM;
1261                 goto out;
1262         }
1263         if (!copy_from_user(proc, (void __user *)attr->addr,
1264                             sizeof(*proc))) {
1265                 kvm->arch.model.cpuid = proc->cpuid;
1266                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1267                 unblocked_ibc = sclp.ibc & 0xfff;
1268                 if (lowest_ibc && proc->ibc) {
1269                         if (proc->ibc > unblocked_ibc)
1270                                 kvm->arch.model.ibc = unblocked_ibc;
1271                         else if (proc->ibc < lowest_ibc)
1272                                 kvm->arch.model.ibc = lowest_ibc;
1273                         else
1274                                 kvm->arch.model.ibc = proc->ibc;
1275                 }
1276                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1277                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1278                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1279                          kvm->arch.model.ibc,
1280                          kvm->arch.model.cpuid);
1281                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1282                          kvm->arch.model.fac_list[0],
1283                          kvm->arch.model.fac_list[1],
1284                          kvm->arch.model.fac_list[2]);
1285         } else
1286                 ret = -EFAULT;
1287         kfree(proc);
1288 out:
1289         mutex_unlock(&kvm->lock);
1290         return ret;
1291 }
1292
1293 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1294                                        struct kvm_device_attr *attr)
1295 {
1296         struct kvm_s390_vm_cpu_feat data;
1297
1298         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1299                 return -EFAULT;
1300         if (!bitmap_subset((unsigned long *) data.feat,
1301                            kvm_s390_available_cpu_feat,
1302                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1303                 return -EINVAL;
1304
1305         mutex_lock(&kvm->lock);
1306         if (kvm->created_vcpus) {
1307                 mutex_unlock(&kvm->lock);
1308                 return -EBUSY;
1309         }
1310         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1311                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1312         mutex_unlock(&kvm->lock);
1313         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1314                          data.feat[0],
1315                          data.feat[1],
1316                          data.feat[2]);
1317         return 0;
1318 }
1319
1320 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1321                                           struct kvm_device_attr *attr)
1322 {
1323         mutex_lock(&kvm->lock);
1324         if (kvm->created_vcpus) {
1325                 mutex_unlock(&kvm->lock);
1326                 return -EBUSY;
1327         }
1328
1329         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1330                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1331                 mutex_unlock(&kvm->lock);
1332                 return -EFAULT;
1333         }
1334         mutex_unlock(&kvm->lock);
1335
1336         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1341         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1344         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1347         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1350         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1353         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1356         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1359         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1362         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1365         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1374         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1377         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1380         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1383         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1388         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1393
1394         return 0;
1395 }
1396
1397 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1398 {
1399         int ret = -ENXIO;
1400
1401         switch (attr->attr) {
1402         case KVM_S390_VM_CPU_PROCESSOR:
1403                 ret = kvm_s390_set_processor(kvm, attr);
1404                 break;
1405         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1406                 ret = kvm_s390_set_processor_feat(kvm, attr);
1407                 break;
1408         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1409                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1410                 break;
1411         }
1412         return ret;
1413 }
1414
1415 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1416 {
1417         struct kvm_s390_vm_cpu_processor *proc;
1418         int ret = 0;
1419
1420         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1421         if (!proc) {
1422                 ret = -ENOMEM;
1423                 goto out;
1424         }
1425         proc->cpuid = kvm->arch.model.cpuid;
1426         proc->ibc = kvm->arch.model.ibc;
1427         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1428                S390_ARCH_FAC_LIST_SIZE_BYTE);
1429         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1430                  kvm->arch.model.ibc,
1431                  kvm->arch.model.cpuid);
1432         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1433                  kvm->arch.model.fac_list[0],
1434                  kvm->arch.model.fac_list[1],
1435                  kvm->arch.model.fac_list[2]);
1436         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1437                 ret = -EFAULT;
1438         kfree(proc);
1439 out:
1440         return ret;
1441 }
1442
1443 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1444 {
1445         struct kvm_s390_vm_cpu_machine *mach;
1446         int ret = 0;
1447
1448         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1449         if (!mach) {
1450                 ret = -ENOMEM;
1451                 goto out;
1452         }
1453         get_cpu_id((struct cpuid *) &mach->cpuid);
1454         mach->ibc = sclp.ibc;
1455         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1456                S390_ARCH_FAC_LIST_SIZE_BYTE);
1457         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1458                sizeof(S390_lowcore.stfle_fac_list));
1459         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1460                  kvm->arch.model.ibc,
1461                  kvm->arch.model.cpuid);
1462         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1463                  mach->fac_mask[0],
1464                  mach->fac_mask[1],
1465                  mach->fac_mask[2]);
1466         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1467                  mach->fac_list[0],
1468                  mach->fac_list[1],
1469                  mach->fac_list[2]);
1470         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1471                 ret = -EFAULT;
1472         kfree(mach);
1473 out:
1474         return ret;
1475 }
1476
1477 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1478                                        struct kvm_device_attr *attr)
1479 {
1480         struct kvm_s390_vm_cpu_feat data;
1481
1482         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1483                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1484         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1485                 return -EFAULT;
1486         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1487                          data.feat[0],
1488                          data.feat[1],
1489                          data.feat[2]);
1490         return 0;
1491 }
1492
1493 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1494                                      struct kvm_device_attr *attr)
1495 {
1496         struct kvm_s390_vm_cpu_feat data;
1497
1498         bitmap_copy((unsigned long *) data.feat,
1499                     kvm_s390_available_cpu_feat,
1500                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1501         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1502                 return -EFAULT;
1503         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1504                          data.feat[0],
1505                          data.feat[1],
1506                          data.feat[2]);
1507         return 0;
1508 }
1509
1510 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1511                                           struct kvm_device_attr *attr)
1512 {
1513         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1514             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1515                 return -EFAULT;
1516
1517         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1522         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1525         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1528         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1531         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1534         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1537         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1540         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1543         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1546         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1555         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1558         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1561         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1564         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1569         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1574
1575         return 0;
1576 }
1577
1578 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1579                                         struct kvm_device_attr *attr)
1580 {
1581         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1582             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1583                 return -EFAULT;
1584
1585         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1586                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1588                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1589                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1590         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1592                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1593         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1594                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1595                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1596         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1597                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1598                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1599         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1600                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1601                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1602         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1605         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1606                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1607                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1608         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1611         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1614         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1623         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1624                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1625                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1626         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1627                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1629         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1630                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1631                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1632         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1633                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1636                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1637         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1638                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1640                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1641                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1642
1643         return 0;
1644 }
1645
1646 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1647 {
1648         int ret = -ENXIO;
1649
1650         switch (attr->attr) {
1651         case KVM_S390_VM_CPU_PROCESSOR:
1652                 ret = kvm_s390_get_processor(kvm, attr);
1653                 break;
1654         case KVM_S390_VM_CPU_MACHINE:
1655                 ret = kvm_s390_get_machine(kvm, attr);
1656                 break;
1657         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1658                 ret = kvm_s390_get_processor_feat(kvm, attr);
1659                 break;
1660         case KVM_S390_VM_CPU_MACHINE_FEAT:
1661                 ret = kvm_s390_get_machine_feat(kvm, attr);
1662                 break;
1663         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1664                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1665                 break;
1666         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1667                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1668                 break;
1669         }
1670         return ret;
1671 }
1672
1673 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1674 {
1675         int ret;
1676
1677         switch (attr->group) {
1678         case KVM_S390_VM_MEM_CTRL:
1679                 ret = kvm_s390_set_mem_control(kvm, attr);
1680                 break;
1681         case KVM_S390_VM_TOD:
1682                 ret = kvm_s390_set_tod(kvm, attr);
1683                 break;
1684         case KVM_S390_VM_CPU_MODEL:
1685                 ret = kvm_s390_set_cpu_model(kvm, attr);
1686                 break;
1687         case KVM_S390_VM_CRYPTO:
1688                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1689                 break;
1690         case KVM_S390_VM_MIGRATION:
1691                 ret = kvm_s390_vm_set_migration(kvm, attr);
1692                 break;
1693         default:
1694                 ret = -ENXIO;
1695                 break;
1696         }
1697
1698         return ret;
1699 }
1700
1701 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702 {
1703         int ret;
1704
1705         switch (attr->group) {
1706         case KVM_S390_VM_MEM_CTRL:
1707                 ret = kvm_s390_get_mem_control(kvm, attr);
1708                 break;
1709         case KVM_S390_VM_TOD:
1710                 ret = kvm_s390_get_tod(kvm, attr);
1711                 break;
1712         case KVM_S390_VM_CPU_MODEL:
1713                 ret = kvm_s390_get_cpu_model(kvm, attr);
1714                 break;
1715         case KVM_S390_VM_MIGRATION:
1716                 ret = kvm_s390_vm_get_migration(kvm, attr);
1717                 break;
1718         default:
1719                 ret = -ENXIO;
1720                 break;
1721         }
1722
1723         return ret;
1724 }
1725
1726 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728         int ret;
1729
1730         switch (attr->group) {
1731         case KVM_S390_VM_MEM_CTRL:
1732                 switch (attr->attr) {
1733                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1734                 case KVM_S390_VM_MEM_CLR_CMMA:
1735                         ret = sclp.has_cmma ? 0 : -ENXIO;
1736                         break;
1737                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1738                         ret = 0;
1739                         break;
1740                 default:
1741                         ret = -ENXIO;
1742                         break;
1743                 }
1744                 break;
1745         case KVM_S390_VM_TOD:
1746                 switch (attr->attr) {
1747                 case KVM_S390_VM_TOD_LOW:
1748                 case KVM_S390_VM_TOD_HIGH:
1749                         ret = 0;
1750                         break;
1751                 default:
1752                         ret = -ENXIO;
1753                         break;
1754                 }
1755                 break;
1756         case KVM_S390_VM_CPU_MODEL:
1757                 switch (attr->attr) {
1758                 case KVM_S390_VM_CPU_PROCESSOR:
1759                 case KVM_S390_VM_CPU_MACHINE:
1760                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1761                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1762                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1763                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1764                         ret = 0;
1765                         break;
1766                 default:
1767                         ret = -ENXIO;
1768                         break;
1769                 }
1770                 break;
1771         case KVM_S390_VM_CRYPTO:
1772                 switch (attr->attr) {
1773                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1774                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1775                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1776                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1777                         ret = 0;
1778                         break;
1779                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1780                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1781                         ret = ap_instructions_available() ? 0 : -ENXIO;
1782                         break;
1783                 default:
1784                         ret = -ENXIO;
1785                         break;
1786                 }
1787                 break;
1788         case KVM_S390_VM_MIGRATION:
1789                 ret = 0;
1790                 break;
1791         default:
1792                 ret = -ENXIO;
1793                 break;
1794         }
1795
1796         return ret;
1797 }
1798
1799 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1800 {
1801         uint8_t *keys;
1802         uint64_t hva;
1803         int srcu_idx, i, r = 0;
1804
1805         if (args->flags != 0)
1806                 return -EINVAL;
1807
1808         /* Is this guest using storage keys? */
1809         if (!mm_uses_skeys(current->mm))
1810                 return KVM_S390_GET_SKEYS_NONE;
1811
1812         /* Enforce sane limit on memory allocation */
1813         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1814                 return -EINVAL;
1815
1816         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1817         if (!keys)
1818                 return -ENOMEM;
1819
1820         mmap_read_lock(current->mm);
1821         srcu_idx = srcu_read_lock(&kvm->srcu);
1822         for (i = 0; i < args->count; i++) {
1823                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1824                 if (kvm_is_error_hva(hva)) {
1825                         r = -EFAULT;
1826                         break;
1827                 }
1828
1829                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1830                 if (r)
1831                         break;
1832         }
1833         srcu_read_unlock(&kvm->srcu, srcu_idx);
1834         mmap_read_unlock(current->mm);
1835
1836         if (!r) {
1837                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1838                                  sizeof(uint8_t) * args->count);
1839                 if (r)
1840                         r = -EFAULT;
1841         }
1842
1843         kvfree(keys);
1844         return r;
1845 }
1846
1847 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1848 {
1849         uint8_t *keys;
1850         uint64_t hva;
1851         int srcu_idx, i, r = 0;
1852         bool unlocked;
1853
1854         if (args->flags != 0)
1855                 return -EINVAL;
1856
1857         /* Enforce sane limit on memory allocation */
1858         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1859                 return -EINVAL;
1860
1861         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1862         if (!keys)
1863                 return -ENOMEM;
1864
1865         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1866                            sizeof(uint8_t) * args->count);
1867         if (r) {
1868                 r = -EFAULT;
1869                 goto out;
1870         }
1871
1872         /* Enable storage key handling for the guest */
1873         r = s390_enable_skey();
1874         if (r)
1875                 goto out;
1876
1877         i = 0;
1878         mmap_read_lock(current->mm);
1879         srcu_idx = srcu_read_lock(&kvm->srcu);
1880         while (i < args->count) {
1881                 unlocked = false;
1882                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1883                 if (kvm_is_error_hva(hva)) {
1884                         r = -EFAULT;
1885                         break;
1886                 }
1887
1888                 /* Lowest order bit is reserved */
1889                 if (keys[i] & 0x01) {
1890                         r = -EINVAL;
1891                         break;
1892                 }
1893
1894                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1895                 if (r) {
1896                         r = fixup_user_fault(current->mm, hva,
1897                                              FAULT_FLAG_WRITE, &unlocked);
1898                         if (r)
1899                                 break;
1900                 }
1901                 if (!r)
1902                         i++;
1903         }
1904         srcu_read_unlock(&kvm->srcu, srcu_idx);
1905         mmap_read_unlock(current->mm);
1906 out:
1907         kvfree(keys);
1908         return r;
1909 }
1910
1911 /*
1912  * Base address and length must be sent at the start of each block, therefore
1913  * it's cheaper to send some clean data, as long as it's less than the size of
1914  * two longs.
1915  */
1916 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1917 /* for consistency */
1918 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1919
1920 /*
1921  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1922  * address falls in a hole. In that case the index of one of the memslots
1923  * bordering the hole is returned.
1924  */
1925 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1926 {
1927         int start = 0, end = slots->used_slots;
1928         int slot = atomic_read(&slots->lru_slot);
1929         struct kvm_memory_slot *memslots = slots->memslots;
1930
1931         if (gfn >= memslots[slot].base_gfn &&
1932             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1933                 return slot;
1934
1935         while (start < end) {
1936                 slot = start + (end - start) / 2;
1937
1938                 if (gfn >= memslots[slot].base_gfn)
1939                         end = slot;
1940                 else
1941                         start = slot + 1;
1942         }
1943
1944         if (start >= slots->used_slots)
1945                 return slots->used_slots - 1;
1946
1947         if (gfn >= memslots[start].base_gfn &&
1948             gfn < memslots[start].base_gfn + memslots[start].npages) {
1949                 atomic_set(&slots->lru_slot, start);
1950         }
1951
1952         return start;
1953 }
1954
1955 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1956                               u8 *res, unsigned long bufsize)
1957 {
1958         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1959
1960         args->count = 0;
1961         while (args->count < bufsize) {
1962                 hva = gfn_to_hva(kvm, cur_gfn);
1963                 /*
1964                  * We return an error if the first value was invalid, but we
1965                  * return successfully if at least one value was copied.
1966                  */
1967                 if (kvm_is_error_hva(hva))
1968                         return args->count ? 0 : -EFAULT;
1969                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1970                         pgstev = 0;
1971                 res[args->count++] = (pgstev >> 24) & 0x43;
1972                 cur_gfn++;
1973         }
1974
1975         return 0;
1976 }
1977
1978 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1979                                               unsigned long cur_gfn)
1980 {
1981         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1982         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1983         unsigned long ofs = cur_gfn - ms->base_gfn;
1984
1985         if (ms->base_gfn + ms->npages <= cur_gfn) {
1986                 slotidx--;
1987                 /* If we are above the highest slot, wrap around */
1988                 if (slotidx < 0)
1989                         slotidx = slots->used_slots - 1;
1990
1991                 ms = slots->memslots + slotidx;
1992                 ofs = 0;
1993         }
1994         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1995         while ((slotidx > 0) && (ofs >= ms->npages)) {
1996                 slotidx--;
1997                 ms = slots->memslots + slotidx;
1998                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1999         }
2000         return ms->base_gfn + ofs;
2001 }
2002
2003 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2004                              u8 *res, unsigned long bufsize)
2005 {
2006         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2007         struct kvm_memslots *slots = kvm_memslots(kvm);
2008         struct kvm_memory_slot *ms;
2009
2010         if (unlikely(!slots->used_slots))
2011                 return 0;
2012
2013         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2014         ms = gfn_to_memslot(kvm, cur_gfn);
2015         args->count = 0;
2016         args->start_gfn = cur_gfn;
2017         if (!ms)
2018                 return 0;
2019         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2020         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2021
2022         while (args->count < bufsize) {
2023                 hva = gfn_to_hva(kvm, cur_gfn);
2024                 if (kvm_is_error_hva(hva))
2025                         return 0;
2026                 /* Decrement only if we actually flipped the bit to 0 */
2027                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2028                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2029                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2030                         pgstev = 0;
2031                 /* Save the value */
2032                 res[args->count++] = (pgstev >> 24) & 0x43;
2033                 /* If the next bit is too far away, stop. */
2034                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2035                         return 0;
2036                 /* If we reached the previous "next", find the next one */
2037                 if (cur_gfn == next_gfn)
2038                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2039                 /* Reached the end of memory or of the buffer, stop */
2040                 if ((next_gfn >= mem_end) ||
2041                     (next_gfn - args->start_gfn >= bufsize))
2042                         return 0;
2043                 cur_gfn++;
2044                 /* Reached the end of the current memslot, take the next one. */
2045                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2046                         ms = gfn_to_memslot(kvm, cur_gfn);
2047                         if (!ms)
2048                                 return 0;
2049                 }
2050         }
2051         return 0;
2052 }
2053
2054 /*
2055  * This function searches for the next page with dirty CMMA attributes, and
2056  * saves the attributes in the buffer up to either the end of the buffer or
2057  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2058  * no trailing clean bytes are saved.
2059  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2060  * output buffer will indicate 0 as length.
2061  */
2062 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2063                                   struct kvm_s390_cmma_log *args)
2064 {
2065         unsigned long bufsize;
2066         int srcu_idx, peek, ret;
2067         u8 *values;
2068
2069         if (!kvm->arch.use_cmma)
2070                 return -ENXIO;
2071         /* Invalid/unsupported flags were specified */
2072         if (args->flags & ~KVM_S390_CMMA_PEEK)
2073                 return -EINVAL;
2074         /* Migration mode query, and we are not doing a migration */
2075         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2076         if (!peek && !kvm->arch.migration_mode)
2077                 return -EINVAL;
2078         /* CMMA is disabled or was not used, or the buffer has length zero */
2079         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2080         if (!bufsize || !kvm->mm->context.uses_cmm) {
2081                 memset(args, 0, sizeof(*args));
2082                 return 0;
2083         }
2084         /* We are not peeking, and there are no dirty pages */
2085         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2086                 memset(args, 0, sizeof(*args));
2087                 return 0;
2088         }
2089
2090         values = vmalloc(bufsize);
2091         if (!values)
2092                 return -ENOMEM;
2093
2094         mmap_read_lock(kvm->mm);
2095         srcu_idx = srcu_read_lock(&kvm->srcu);
2096         if (peek)
2097                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2098         else
2099                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2100         srcu_read_unlock(&kvm->srcu, srcu_idx);
2101         mmap_read_unlock(kvm->mm);
2102
2103         if (kvm->arch.migration_mode)
2104                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2105         else
2106                 args->remaining = 0;
2107
2108         if (copy_to_user((void __user *)args->values, values, args->count))
2109                 ret = -EFAULT;
2110
2111         vfree(values);
2112         return ret;
2113 }
2114
2115 /*
2116  * This function sets the CMMA attributes for the given pages. If the input
2117  * buffer has zero length, no action is taken, otherwise the attributes are
2118  * set and the mm->context.uses_cmm flag is set.
2119  */
2120 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2121                                   const struct kvm_s390_cmma_log *args)
2122 {
2123         unsigned long hva, mask, pgstev, i;
2124         uint8_t *bits;
2125         int srcu_idx, r = 0;
2126
2127         mask = args->mask;
2128
2129         if (!kvm->arch.use_cmma)
2130                 return -ENXIO;
2131         /* invalid/unsupported flags */
2132         if (args->flags != 0)
2133                 return -EINVAL;
2134         /* Enforce sane limit on memory allocation */
2135         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2136                 return -EINVAL;
2137         /* Nothing to do */
2138         if (args->count == 0)
2139                 return 0;
2140
2141         bits = vmalloc(array_size(sizeof(*bits), args->count));
2142         if (!bits)
2143                 return -ENOMEM;
2144
2145         r = copy_from_user(bits, (void __user *)args->values, args->count);
2146         if (r) {
2147                 r = -EFAULT;
2148                 goto out;
2149         }
2150
2151         mmap_read_lock(kvm->mm);
2152         srcu_idx = srcu_read_lock(&kvm->srcu);
2153         for (i = 0; i < args->count; i++) {
2154                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2155                 if (kvm_is_error_hva(hva)) {
2156                         r = -EFAULT;
2157                         break;
2158                 }
2159
2160                 pgstev = bits[i];
2161                 pgstev = pgstev << 24;
2162                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2163                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2164         }
2165         srcu_read_unlock(&kvm->srcu, srcu_idx);
2166         mmap_read_unlock(kvm->mm);
2167
2168         if (!kvm->mm->context.uses_cmm) {
2169                 mmap_write_lock(kvm->mm);
2170                 kvm->mm->context.uses_cmm = 1;
2171                 mmap_write_unlock(kvm->mm);
2172         }
2173 out:
2174         vfree(bits);
2175         return r;
2176 }
2177
2178 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2179 {
2180         struct kvm_vcpu *vcpu;
2181         u16 rc, rrc;
2182         int ret = 0;
2183         int i;
2184
2185         /*
2186          * We ignore failures and try to destroy as many CPUs as possible.
2187          * At the same time we must not free the assigned resources when
2188          * this fails, as the ultravisor has still access to that memory.
2189          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2190          * behind.
2191          * We want to return the first failure rc and rrc, though.
2192          */
2193         kvm_for_each_vcpu(i, vcpu, kvm) {
2194                 mutex_lock(&vcpu->mutex);
2195                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2196                         *rcp = rc;
2197                         *rrcp = rrc;
2198                         ret = -EIO;
2199                 }
2200                 mutex_unlock(&vcpu->mutex);
2201         }
2202         return ret;
2203 }
2204
2205 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2206 {
2207         int i, r = 0;
2208         u16 dummy;
2209
2210         struct kvm_vcpu *vcpu;
2211
2212         kvm_for_each_vcpu(i, vcpu, kvm) {
2213                 mutex_lock(&vcpu->mutex);
2214                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2215                 mutex_unlock(&vcpu->mutex);
2216                 if (r)
2217                         break;
2218         }
2219         if (r)
2220                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2221         return r;
2222 }
2223
2224 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2225 {
2226         int r = 0;
2227         u16 dummy;
2228         void __user *argp = (void __user *)cmd->data;
2229
2230         switch (cmd->cmd) {
2231         case KVM_PV_ENABLE: {
2232                 r = -EINVAL;
2233                 if (kvm_s390_pv_is_protected(kvm))
2234                         break;
2235
2236                 /*
2237                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2238                  *  esca, we need no cleanup in the error cases below
2239                  */
2240                 r = sca_switch_to_extended(kvm);
2241                 if (r)
2242                         break;
2243
2244                 mmap_write_lock(current->mm);
2245                 r = gmap_mark_unmergeable();
2246                 mmap_write_unlock(current->mm);
2247                 if (r)
2248                         break;
2249
2250                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2251                 if (r)
2252                         break;
2253
2254                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2255                 if (r)
2256                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2257
2258                 /* we need to block service interrupts from now on */
2259                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2260                 break;
2261         }
2262         case KVM_PV_DISABLE: {
2263                 r = -EINVAL;
2264                 if (!kvm_s390_pv_is_protected(kvm))
2265                         break;
2266
2267                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2268                 /*
2269                  * If a CPU could not be destroyed, destroy VM will also fail.
2270                  * There is no point in trying to destroy it. Instead return
2271                  * the rc and rrc from the first CPU that failed destroying.
2272                  */
2273                 if (r)
2274                         break;
2275                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2276
2277                 /* no need to block service interrupts any more */
2278                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2279                 break;
2280         }
2281         case KVM_PV_SET_SEC_PARMS: {
2282                 struct kvm_s390_pv_sec_parm parms = {};
2283                 void *hdr;
2284
2285                 r = -EINVAL;
2286                 if (!kvm_s390_pv_is_protected(kvm))
2287                         break;
2288
2289                 r = -EFAULT;
2290                 if (copy_from_user(&parms, argp, sizeof(parms)))
2291                         break;
2292
2293                 /* Currently restricted to 8KB */
2294                 r = -EINVAL;
2295                 if (parms.length > PAGE_SIZE * 2)
2296                         break;
2297
2298                 r = -ENOMEM;
2299                 hdr = vmalloc(parms.length);
2300                 if (!hdr)
2301                         break;
2302
2303                 r = -EFAULT;
2304                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2305                                     parms.length))
2306                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2307                                                       &cmd->rc, &cmd->rrc);
2308
2309                 vfree(hdr);
2310                 break;
2311         }
2312         case KVM_PV_UNPACK: {
2313                 struct kvm_s390_pv_unp unp = {};
2314
2315                 r = -EINVAL;
2316                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2317                         break;
2318
2319                 r = -EFAULT;
2320                 if (copy_from_user(&unp, argp, sizeof(unp)))
2321                         break;
2322
2323                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2324                                        &cmd->rc, &cmd->rrc);
2325                 break;
2326         }
2327         case KVM_PV_VERIFY: {
2328                 r = -EINVAL;
2329                 if (!kvm_s390_pv_is_protected(kvm))
2330                         break;
2331
2332                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2333                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2334                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2335                              cmd->rrc);
2336                 break;
2337         }
2338         case KVM_PV_PREP_RESET: {
2339                 r = -EINVAL;
2340                 if (!kvm_s390_pv_is_protected(kvm))
2341                         break;
2342
2343                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2344                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2345                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2346                              cmd->rc, cmd->rrc);
2347                 break;
2348         }
2349         case KVM_PV_UNSHARE_ALL: {
2350                 r = -EINVAL;
2351                 if (!kvm_s390_pv_is_protected(kvm))
2352                         break;
2353
2354                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2355                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2356                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2357                              cmd->rc, cmd->rrc);
2358                 break;
2359         }
2360         default:
2361                 r = -ENOTTY;
2362         }
2363         return r;
2364 }
2365
2366 long kvm_arch_vm_ioctl(struct file *filp,
2367                        unsigned int ioctl, unsigned long arg)
2368 {
2369         struct kvm *kvm = filp->private_data;
2370         void __user *argp = (void __user *)arg;
2371         struct kvm_device_attr attr;
2372         int r;
2373
2374         switch (ioctl) {
2375         case KVM_S390_INTERRUPT: {
2376                 struct kvm_s390_interrupt s390int;
2377
2378                 r = -EFAULT;
2379                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2380                         break;
2381                 r = kvm_s390_inject_vm(kvm, &s390int);
2382                 break;
2383         }
2384         case KVM_CREATE_IRQCHIP: {
2385                 struct kvm_irq_routing_entry routing;
2386
2387                 r = -EINVAL;
2388                 if (kvm->arch.use_irqchip) {
2389                         /* Set up dummy routing. */
2390                         memset(&routing, 0, sizeof(routing));
2391                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2392                 }
2393                 break;
2394         }
2395         case KVM_SET_DEVICE_ATTR: {
2396                 r = -EFAULT;
2397                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2398                         break;
2399                 r = kvm_s390_vm_set_attr(kvm, &attr);
2400                 break;
2401         }
2402         case KVM_GET_DEVICE_ATTR: {
2403                 r = -EFAULT;
2404                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2405                         break;
2406                 r = kvm_s390_vm_get_attr(kvm, &attr);
2407                 break;
2408         }
2409         case KVM_HAS_DEVICE_ATTR: {
2410                 r = -EFAULT;
2411                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2412                         break;
2413                 r = kvm_s390_vm_has_attr(kvm, &attr);
2414                 break;
2415         }
2416         case KVM_S390_GET_SKEYS: {
2417                 struct kvm_s390_skeys args;
2418
2419                 r = -EFAULT;
2420                 if (copy_from_user(&args, argp,
2421                                    sizeof(struct kvm_s390_skeys)))
2422                         break;
2423                 r = kvm_s390_get_skeys(kvm, &args);
2424                 break;
2425         }
2426         case KVM_S390_SET_SKEYS: {
2427                 struct kvm_s390_skeys args;
2428
2429                 r = -EFAULT;
2430                 if (copy_from_user(&args, argp,
2431                                    sizeof(struct kvm_s390_skeys)))
2432                         break;
2433                 r = kvm_s390_set_skeys(kvm, &args);
2434                 break;
2435         }
2436         case KVM_S390_GET_CMMA_BITS: {
2437                 struct kvm_s390_cmma_log args;
2438
2439                 r = -EFAULT;
2440                 if (copy_from_user(&args, argp, sizeof(args)))
2441                         break;
2442                 mutex_lock(&kvm->slots_lock);
2443                 r = kvm_s390_get_cmma_bits(kvm, &args);
2444                 mutex_unlock(&kvm->slots_lock);
2445                 if (!r) {
2446                         r = copy_to_user(argp, &args, sizeof(args));
2447                         if (r)
2448                                 r = -EFAULT;
2449                 }
2450                 break;
2451         }
2452         case KVM_S390_SET_CMMA_BITS: {
2453                 struct kvm_s390_cmma_log args;
2454
2455                 r = -EFAULT;
2456                 if (copy_from_user(&args, argp, sizeof(args)))
2457                         break;
2458                 mutex_lock(&kvm->slots_lock);
2459                 r = kvm_s390_set_cmma_bits(kvm, &args);
2460                 mutex_unlock(&kvm->slots_lock);
2461                 break;
2462         }
2463         case KVM_S390_PV_COMMAND: {
2464                 struct kvm_pv_cmd args;
2465
2466                 /* protvirt means user sigp */
2467                 kvm->arch.user_cpu_state_ctrl = 1;
2468                 r = 0;
2469                 if (!is_prot_virt_host()) {
2470                         r = -EINVAL;
2471                         break;
2472                 }
2473                 if (copy_from_user(&args, argp, sizeof(args))) {
2474                         r = -EFAULT;
2475                         break;
2476                 }
2477                 if (args.flags) {
2478                         r = -EINVAL;
2479                         break;
2480                 }
2481                 mutex_lock(&kvm->lock);
2482                 r = kvm_s390_handle_pv(kvm, &args);
2483                 mutex_unlock(&kvm->lock);
2484                 if (copy_to_user(argp, &args, sizeof(args))) {
2485                         r = -EFAULT;
2486                         break;
2487                 }
2488                 break;
2489         }
2490         default:
2491                 r = -ENOTTY;
2492         }
2493
2494         return r;
2495 }
2496
2497 static int kvm_s390_apxa_installed(void)
2498 {
2499         struct ap_config_info info;
2500
2501         if (ap_instructions_available()) {
2502                 if (ap_qci(&info) == 0)
2503                         return info.apxa;
2504         }
2505
2506         return 0;
2507 }
2508
2509 /*
2510  * The format of the crypto control block (CRYCB) is specified in the 3 low
2511  * order bits of the CRYCB designation (CRYCBD) field as follows:
2512  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2513  *           AP extended addressing (APXA) facility are installed.
2514  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2515  * Format 2: Both the APXA and MSAX3 facilities are installed
2516  */
2517 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2518 {
2519         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2520
2521         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2522         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2523
2524         /* Check whether MSAX3 is installed */
2525         if (!test_kvm_facility(kvm, 76))
2526                 return;
2527
2528         if (kvm_s390_apxa_installed())
2529                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2530         else
2531                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2532 }
2533
2534 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2535                                unsigned long *aqm, unsigned long *adm)
2536 {
2537         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2538
2539         mutex_lock(&kvm->lock);
2540         kvm_s390_vcpu_block_all(kvm);
2541
2542         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2543         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2544                 memcpy(crycb->apcb1.apm, apm, 32);
2545                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2546                          apm[0], apm[1], apm[2], apm[3]);
2547                 memcpy(crycb->apcb1.aqm, aqm, 32);
2548                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2549                          aqm[0], aqm[1], aqm[2], aqm[3]);
2550                 memcpy(crycb->apcb1.adm, adm, 32);
2551                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2552                          adm[0], adm[1], adm[2], adm[3]);
2553                 break;
2554         case CRYCB_FORMAT1:
2555         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2556                 memcpy(crycb->apcb0.apm, apm, 8);
2557                 memcpy(crycb->apcb0.aqm, aqm, 2);
2558                 memcpy(crycb->apcb0.adm, adm, 2);
2559                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2560                          apm[0], *((unsigned short *)aqm),
2561                          *((unsigned short *)adm));
2562                 break;
2563         default:        /* Can not happen */
2564                 break;
2565         }
2566
2567         /* recreate the shadow crycb for each vcpu */
2568         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2569         kvm_s390_vcpu_unblock_all(kvm);
2570         mutex_unlock(&kvm->lock);
2571 }
2572 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2573
2574 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2575 {
2576         mutex_lock(&kvm->lock);
2577         kvm_s390_vcpu_block_all(kvm);
2578
2579         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2580                sizeof(kvm->arch.crypto.crycb->apcb0));
2581         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2582                sizeof(kvm->arch.crypto.crycb->apcb1));
2583
2584         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2585         /* recreate the shadow crycb for each vcpu */
2586         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2587         kvm_s390_vcpu_unblock_all(kvm);
2588         mutex_unlock(&kvm->lock);
2589 }
2590 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2591
2592 static u64 kvm_s390_get_initial_cpuid(void)
2593 {
2594         struct cpuid cpuid;
2595
2596         get_cpu_id(&cpuid);
2597         cpuid.version = 0xff;
2598         return *((u64 *) &cpuid);
2599 }
2600
2601 static void kvm_s390_crypto_init(struct kvm *kvm)
2602 {
2603         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2604         kvm_s390_set_crycb_format(kvm);
2605
2606         if (!test_kvm_facility(kvm, 76))
2607                 return;
2608
2609         /* Enable AES/DEA protected key functions by default */
2610         kvm->arch.crypto.aes_kw = 1;
2611         kvm->arch.crypto.dea_kw = 1;
2612         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2613                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2614         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2615                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2616 }
2617
2618 static void sca_dispose(struct kvm *kvm)
2619 {
2620         if (kvm->arch.use_esca)
2621                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2622         else
2623                 free_page((unsigned long)(kvm->arch.sca));
2624         kvm->arch.sca = NULL;
2625 }
2626
2627 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2628 {
2629         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2630         int i, rc;
2631         char debug_name[16];
2632         static unsigned long sca_offset;
2633
2634         rc = -EINVAL;
2635 #ifdef CONFIG_KVM_S390_UCONTROL
2636         if (type & ~KVM_VM_S390_UCONTROL)
2637                 goto out_err;
2638         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2639                 goto out_err;
2640 #else
2641         if (type)
2642                 goto out_err;
2643 #endif
2644
2645         rc = s390_enable_sie();
2646         if (rc)
2647                 goto out_err;
2648
2649         rc = -ENOMEM;
2650
2651         if (!sclp.has_64bscao)
2652                 alloc_flags |= GFP_DMA;
2653         rwlock_init(&kvm->arch.sca_lock);
2654         /* start with basic SCA */
2655         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2656         if (!kvm->arch.sca)
2657                 goto out_err;
2658         mutex_lock(&kvm_lock);
2659         sca_offset += 16;
2660         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2661                 sca_offset = 0;
2662         kvm->arch.sca = (struct bsca_block *)
2663                         ((char *) kvm->arch.sca + sca_offset);
2664         mutex_unlock(&kvm_lock);
2665
2666         sprintf(debug_name, "kvm-%u", current->pid);
2667
2668         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2669         if (!kvm->arch.dbf)
2670                 goto out_err;
2671
2672         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2673         kvm->arch.sie_page2 =
2674              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2675         if (!kvm->arch.sie_page2)
2676                 goto out_err;
2677
2678         kvm->arch.sie_page2->kvm = kvm;
2679         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2680
2681         for (i = 0; i < kvm_s390_fac_size(); i++) {
2682                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2683                                               (kvm_s390_fac_base[i] |
2684                                                kvm_s390_fac_ext[i]);
2685                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2686                                               kvm_s390_fac_base[i];
2687         }
2688         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2689
2690         /* we are always in czam mode - even on pre z14 machines */
2691         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2692         set_kvm_facility(kvm->arch.model.fac_list, 138);
2693         /* we emulate STHYI in kvm */
2694         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2695         set_kvm_facility(kvm->arch.model.fac_list, 74);
2696         if (MACHINE_HAS_TLB_GUEST) {
2697                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2698                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2699         }
2700
2701         if (css_general_characteristics.aiv && test_facility(65))
2702                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2703
2704         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2705         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2706
2707         kvm_s390_crypto_init(kvm);
2708
2709         mutex_init(&kvm->arch.float_int.ais_lock);
2710         spin_lock_init(&kvm->arch.float_int.lock);
2711         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2712                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2713         init_waitqueue_head(&kvm->arch.ipte_wq);
2714         mutex_init(&kvm->arch.ipte_mutex);
2715
2716         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2717         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2718
2719         if (type & KVM_VM_S390_UCONTROL) {
2720                 kvm->arch.gmap = NULL;
2721                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2722         } else {
2723                 if (sclp.hamax == U64_MAX)
2724                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2725                 else
2726                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2727                                                     sclp.hamax + 1);
2728                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2729                 if (!kvm->arch.gmap)
2730                         goto out_err;
2731                 kvm->arch.gmap->private = kvm;
2732                 kvm->arch.gmap->pfault_enabled = 0;
2733         }
2734
2735         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2736         kvm->arch.use_skf = sclp.has_skey;
2737         spin_lock_init(&kvm->arch.start_stop_lock);
2738         kvm_s390_vsie_init(kvm);
2739         if (use_gisa)
2740                 kvm_s390_gisa_init(kvm);
2741         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2742
2743         return 0;
2744 out_err:
2745         free_page((unsigned long)kvm->arch.sie_page2);
2746         debug_unregister(kvm->arch.dbf);
2747         sca_dispose(kvm);
2748         KVM_EVENT(3, "creation of vm failed: %d", rc);
2749         return rc;
2750 }
2751
2752 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2753 {
2754         u16 rc, rrc;
2755
2756         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2757         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2758         kvm_s390_clear_local_irqs(vcpu);
2759         kvm_clear_async_pf_completion_queue(vcpu);
2760         if (!kvm_is_ucontrol(vcpu->kvm))
2761                 sca_del_vcpu(vcpu);
2762
2763         if (kvm_is_ucontrol(vcpu->kvm))
2764                 gmap_remove(vcpu->arch.gmap);
2765
2766         if (vcpu->kvm->arch.use_cmma)
2767                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2768         /* We can not hold the vcpu mutex here, we are already dying */
2769         if (kvm_s390_pv_cpu_get_handle(vcpu))
2770                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2771         free_page((unsigned long)(vcpu->arch.sie_block));
2772 }
2773
2774 static void kvm_free_vcpus(struct kvm *kvm)
2775 {
2776         unsigned int i;
2777         struct kvm_vcpu *vcpu;
2778
2779         kvm_for_each_vcpu(i, vcpu, kvm)
2780                 kvm_vcpu_destroy(vcpu);
2781
2782         mutex_lock(&kvm->lock);
2783         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2784                 kvm->vcpus[i] = NULL;
2785
2786         atomic_set(&kvm->online_vcpus, 0);
2787         mutex_unlock(&kvm->lock);
2788 }
2789
2790 void kvm_arch_destroy_vm(struct kvm *kvm)
2791 {
2792         u16 rc, rrc;
2793
2794         kvm_free_vcpus(kvm);
2795         sca_dispose(kvm);
2796         kvm_s390_gisa_destroy(kvm);
2797         /*
2798          * We are already at the end of life and kvm->lock is not taken.
2799          * This is ok as the file descriptor is closed by now and nobody
2800          * can mess with the pv state. To avoid lockdep_assert_held from
2801          * complaining we do not use kvm_s390_pv_is_protected.
2802          */
2803         if (kvm_s390_pv_get_handle(kvm))
2804                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2805         debug_unregister(kvm->arch.dbf);
2806         free_page((unsigned long)kvm->arch.sie_page2);
2807         if (!kvm_is_ucontrol(kvm))
2808                 gmap_remove(kvm->arch.gmap);
2809         kvm_s390_destroy_adapters(kvm);
2810         kvm_s390_clear_float_irqs(kvm);
2811         kvm_s390_vsie_destroy(kvm);
2812         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2813 }
2814
2815 /* Section: vcpu related */
2816 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2817 {
2818         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2819         if (!vcpu->arch.gmap)
2820                 return -ENOMEM;
2821         vcpu->arch.gmap->private = vcpu->kvm;
2822
2823         return 0;
2824 }
2825
2826 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2827 {
2828         if (!kvm_s390_use_sca_entries())
2829                 return;
2830         read_lock(&vcpu->kvm->arch.sca_lock);
2831         if (vcpu->kvm->arch.use_esca) {
2832                 struct esca_block *sca = vcpu->kvm->arch.sca;
2833
2834                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2835                 sca->cpu[vcpu->vcpu_id].sda = 0;
2836         } else {
2837                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2838
2839                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2840                 sca->cpu[vcpu->vcpu_id].sda = 0;
2841         }
2842         read_unlock(&vcpu->kvm->arch.sca_lock);
2843 }
2844
2845 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2846 {
2847         if (!kvm_s390_use_sca_entries()) {
2848                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2849
2850                 /* we still need the basic sca for the ipte control */
2851                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2852                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2853                 return;
2854         }
2855         read_lock(&vcpu->kvm->arch.sca_lock);
2856         if (vcpu->kvm->arch.use_esca) {
2857                 struct esca_block *sca = vcpu->kvm->arch.sca;
2858
2859                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2860                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2861                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2862                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2863                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2864         } else {
2865                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2866
2867                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2868                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2869                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2870                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2871         }
2872         read_unlock(&vcpu->kvm->arch.sca_lock);
2873 }
2874
2875 /* Basic SCA to Extended SCA data copy routines */
2876 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2877 {
2878         d->sda = s->sda;
2879         d->sigp_ctrl.c = s->sigp_ctrl.c;
2880         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2881 }
2882
2883 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2884 {
2885         int i;
2886
2887         d->ipte_control = s->ipte_control;
2888         d->mcn[0] = s->mcn;
2889         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2890                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2891 }
2892
2893 static int sca_switch_to_extended(struct kvm *kvm)
2894 {
2895         struct bsca_block *old_sca = kvm->arch.sca;
2896         struct esca_block *new_sca;
2897         struct kvm_vcpu *vcpu;
2898         unsigned int vcpu_idx;
2899         u32 scaol, scaoh;
2900
2901         if (kvm->arch.use_esca)
2902                 return 0;
2903
2904         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2905         if (!new_sca)
2906                 return -ENOMEM;
2907
2908         scaoh = (u32)((u64)(new_sca) >> 32);
2909         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2910
2911         kvm_s390_vcpu_block_all(kvm);
2912         write_lock(&kvm->arch.sca_lock);
2913
2914         sca_copy_b_to_e(new_sca, old_sca);
2915
2916         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2917                 vcpu->arch.sie_block->scaoh = scaoh;
2918                 vcpu->arch.sie_block->scaol = scaol;
2919                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2920         }
2921         kvm->arch.sca = new_sca;
2922         kvm->arch.use_esca = 1;
2923
2924         write_unlock(&kvm->arch.sca_lock);
2925         kvm_s390_vcpu_unblock_all(kvm);
2926
2927         free_page((unsigned long)old_sca);
2928
2929         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2930                  old_sca, kvm->arch.sca);
2931         return 0;
2932 }
2933
2934 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2935 {
2936         int rc;
2937
2938         if (!kvm_s390_use_sca_entries()) {
2939                 if (id < KVM_MAX_VCPUS)
2940                         return true;
2941                 return false;
2942         }
2943         if (id < KVM_S390_BSCA_CPU_SLOTS)
2944                 return true;
2945         if (!sclp.has_esca || !sclp.has_64bscao)
2946                 return false;
2947
2948         mutex_lock(&kvm->lock);
2949         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2950         mutex_unlock(&kvm->lock);
2951
2952         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2953 }
2954
2955 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2956 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2957 {
2958         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2959         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2960         vcpu->arch.cputm_start = get_tod_clock_fast();
2961         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2962 }
2963
2964 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2965 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2966 {
2967         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2968         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2969         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2970         vcpu->arch.cputm_start = 0;
2971         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2972 }
2973
2974 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2975 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2976 {
2977         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2978         vcpu->arch.cputm_enabled = true;
2979         __start_cpu_timer_accounting(vcpu);
2980 }
2981
2982 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2983 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2984 {
2985         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2986         __stop_cpu_timer_accounting(vcpu);
2987         vcpu->arch.cputm_enabled = false;
2988 }
2989
2990 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2991 {
2992         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2993         __enable_cpu_timer_accounting(vcpu);
2994         preempt_enable();
2995 }
2996
2997 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2998 {
2999         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3000         __disable_cpu_timer_accounting(vcpu);
3001         preempt_enable();
3002 }
3003
3004 /* set the cpu timer - may only be called from the VCPU thread itself */
3005 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3006 {
3007         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3008         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3009         if (vcpu->arch.cputm_enabled)
3010                 vcpu->arch.cputm_start = get_tod_clock_fast();
3011         vcpu->arch.sie_block->cputm = cputm;
3012         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3013         preempt_enable();
3014 }
3015
3016 /* update and get the cpu timer - can also be called from other VCPU threads */
3017 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3018 {
3019         unsigned int seq;
3020         __u64 value;
3021
3022         if (unlikely(!vcpu->arch.cputm_enabled))
3023                 return vcpu->arch.sie_block->cputm;
3024
3025         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3026         do {
3027                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3028                 /*
3029                  * If the writer would ever execute a read in the critical
3030                  * section, e.g. in irq context, we have a deadlock.
3031                  */
3032                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3033                 value = vcpu->arch.sie_block->cputm;
3034                 /* if cputm_start is 0, accounting is being started/stopped */
3035                 if (likely(vcpu->arch.cputm_start))
3036                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3037         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3038         preempt_enable();
3039         return value;
3040 }
3041
3042 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3043 {
3044
3045         gmap_enable(vcpu->arch.enabled_gmap);
3046         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3047         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3048                 __start_cpu_timer_accounting(vcpu);
3049         vcpu->cpu = cpu;
3050 }
3051
3052 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3053 {
3054         vcpu->cpu = -1;
3055         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3056                 __stop_cpu_timer_accounting(vcpu);
3057         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3058         vcpu->arch.enabled_gmap = gmap_get_enabled();
3059         gmap_disable(vcpu->arch.enabled_gmap);
3060
3061 }
3062
3063 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3064 {
3065         mutex_lock(&vcpu->kvm->lock);
3066         preempt_disable();
3067         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3068         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3069         preempt_enable();
3070         mutex_unlock(&vcpu->kvm->lock);
3071         if (!kvm_is_ucontrol(vcpu->kvm)) {
3072                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3073                 sca_add_vcpu(vcpu);
3074         }
3075         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3076                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3077         /* make vcpu_load load the right gmap on the first trigger */
3078         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3079 }
3080
3081 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3082 {
3083         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3084             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3085                 return true;
3086         return false;
3087 }
3088
3089 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3090 {
3091         /* At least one ECC subfunction must be present */
3092         return kvm_has_pckmo_subfunc(kvm, 32) ||
3093                kvm_has_pckmo_subfunc(kvm, 33) ||
3094                kvm_has_pckmo_subfunc(kvm, 34) ||
3095                kvm_has_pckmo_subfunc(kvm, 40) ||
3096                kvm_has_pckmo_subfunc(kvm, 41);
3097
3098 }
3099
3100 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3101 {
3102         /*
3103          * If the AP instructions are not being interpreted and the MSAX3
3104          * facility is not configured for the guest, there is nothing to set up.
3105          */
3106         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3107                 return;
3108
3109         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3110         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3111         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3112         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3113
3114         if (vcpu->kvm->arch.crypto.apie)
3115                 vcpu->arch.sie_block->eca |= ECA_APIE;
3116
3117         /* Set up protected key support */
3118         if (vcpu->kvm->arch.crypto.aes_kw) {
3119                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3120                 /* ecc is also wrapped with AES key */
3121                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3122                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3123         }
3124
3125         if (vcpu->kvm->arch.crypto.dea_kw)
3126                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3127 }
3128
3129 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3130 {
3131         free_page(vcpu->arch.sie_block->cbrlo);
3132         vcpu->arch.sie_block->cbrlo = 0;
3133 }
3134
3135 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3136 {
3137         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3138         if (!vcpu->arch.sie_block->cbrlo)
3139                 return -ENOMEM;
3140         return 0;
3141 }
3142
3143 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3144 {
3145         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3146
3147         vcpu->arch.sie_block->ibc = model->ibc;
3148         if (test_kvm_facility(vcpu->kvm, 7))
3149                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3150 }
3151
3152 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3153 {
3154         int rc = 0;
3155         u16 uvrc, uvrrc;
3156
3157         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3158                                                     CPUSTAT_SM |
3159                                                     CPUSTAT_STOPPED);
3160
3161         if (test_kvm_facility(vcpu->kvm, 78))
3162                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3163         else if (test_kvm_facility(vcpu->kvm, 8))
3164                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3165
3166         kvm_s390_vcpu_setup_model(vcpu);
3167
3168         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3169         if (MACHINE_HAS_ESOP)
3170                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3171         if (test_kvm_facility(vcpu->kvm, 9))
3172                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3173         if (test_kvm_facility(vcpu->kvm, 73))
3174                 vcpu->arch.sie_block->ecb |= ECB_TE;
3175
3176         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3177                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3178         if (test_kvm_facility(vcpu->kvm, 130))
3179                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3180         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3181         if (sclp.has_cei)
3182                 vcpu->arch.sie_block->eca |= ECA_CEI;
3183         if (sclp.has_ib)
3184                 vcpu->arch.sie_block->eca |= ECA_IB;
3185         if (sclp.has_siif)
3186                 vcpu->arch.sie_block->eca |= ECA_SII;
3187         if (sclp.has_sigpif)
3188                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3189         if (test_kvm_facility(vcpu->kvm, 129)) {
3190                 vcpu->arch.sie_block->eca |= ECA_VX;
3191                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3192         }
3193         if (test_kvm_facility(vcpu->kvm, 139))
3194                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3195         if (test_kvm_facility(vcpu->kvm, 156))
3196                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3197         if (vcpu->arch.sie_block->gd) {
3198                 vcpu->arch.sie_block->eca |= ECA_AIV;
3199                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3200                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3201         }
3202         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3203                                         | SDNXC;
3204         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3205
3206         if (sclp.has_kss)
3207                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3208         else
3209                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3210
3211         if (vcpu->kvm->arch.use_cmma) {
3212                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3213                 if (rc)
3214                         return rc;
3215         }
3216         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3217         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3218
3219         vcpu->arch.sie_block->hpid = HPID_KVM;
3220
3221         kvm_s390_vcpu_crypto_setup(vcpu);
3222
3223         mutex_lock(&vcpu->kvm->lock);
3224         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3225                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3226                 if (rc)
3227                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3228         }
3229         mutex_unlock(&vcpu->kvm->lock);
3230
3231         return rc;
3232 }
3233
3234 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3235 {
3236         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3237                 return -EINVAL;
3238         return 0;
3239 }
3240
3241 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3242 {
3243         struct sie_page *sie_page;
3244         int rc;
3245
3246         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3247         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3248         if (!sie_page)
3249                 return -ENOMEM;
3250
3251         vcpu->arch.sie_block = &sie_page->sie_block;
3252         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3253
3254         /* the real guest size will always be smaller than msl */
3255         vcpu->arch.sie_block->mso = 0;
3256         vcpu->arch.sie_block->msl = sclp.hamax;
3257
3258         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3259         spin_lock_init(&vcpu->arch.local_int.lock);
3260         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3261         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3262                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3263         seqcount_init(&vcpu->arch.cputm_seqcount);
3264
3265         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3266         kvm_clear_async_pf_completion_queue(vcpu);
3267         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3268                                     KVM_SYNC_GPRS |
3269                                     KVM_SYNC_ACRS |
3270                                     KVM_SYNC_CRS |
3271                                     KVM_SYNC_ARCH0 |
3272                                     KVM_SYNC_PFAULT |
3273                                     KVM_SYNC_DIAG318;
3274         kvm_s390_set_prefix(vcpu, 0);
3275         if (test_kvm_facility(vcpu->kvm, 64))
3276                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3277         if (test_kvm_facility(vcpu->kvm, 82))
3278                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3279         if (test_kvm_facility(vcpu->kvm, 133))
3280                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3281         if (test_kvm_facility(vcpu->kvm, 156))
3282                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3283         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3284          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3285          */
3286         if (MACHINE_HAS_VX)
3287                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3288         else
3289                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3290
3291         if (kvm_is_ucontrol(vcpu->kvm)) {
3292                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3293                 if (rc)
3294                         goto out_free_sie_block;
3295         }
3296
3297         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3298                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3299         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3300
3301         rc = kvm_s390_vcpu_setup(vcpu);
3302         if (rc)
3303                 goto out_ucontrol_uninit;
3304         return 0;
3305
3306 out_ucontrol_uninit:
3307         if (kvm_is_ucontrol(vcpu->kvm))
3308                 gmap_remove(vcpu->arch.gmap);
3309 out_free_sie_block:
3310         free_page((unsigned long)(vcpu->arch.sie_block));
3311         return rc;
3312 }
3313
3314 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3315 {
3316         return kvm_s390_vcpu_has_irq(vcpu, 0);
3317 }
3318
3319 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3320 {
3321         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3322 }
3323
3324 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3325 {
3326         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3327         exit_sie(vcpu);
3328 }
3329
3330 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3331 {
3332         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3333 }
3334
3335 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3336 {
3337         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3338         exit_sie(vcpu);
3339 }
3340
3341 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3342 {
3343         return atomic_read(&vcpu->arch.sie_block->prog20) &
3344                (PROG_BLOCK_SIE | PROG_REQUEST);
3345 }
3346
3347 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3348 {
3349         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3350 }
3351
3352 /*
3353  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3354  * If the CPU is not running (e.g. waiting as idle) the function will
3355  * return immediately. */
3356 void exit_sie(struct kvm_vcpu *vcpu)
3357 {
3358         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3359         kvm_s390_vsie_kick(vcpu);
3360         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3361                 cpu_relax();
3362 }
3363
3364 /* Kick a guest cpu out of SIE to process a request synchronously */
3365 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3366 {
3367         kvm_make_request(req, vcpu);
3368         kvm_s390_vcpu_request(vcpu);
3369 }
3370
3371 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3372                               unsigned long end)
3373 {
3374         struct kvm *kvm = gmap->private;
3375         struct kvm_vcpu *vcpu;
3376         unsigned long prefix;
3377         int i;
3378
3379         if (gmap_is_shadow(gmap))
3380                 return;
3381         if (start >= 1UL << 31)
3382                 /* We are only interested in prefix pages */
3383                 return;
3384         kvm_for_each_vcpu(i, vcpu, kvm) {
3385                 /* match against both prefix pages */
3386                 prefix = kvm_s390_get_prefix(vcpu);
3387                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3388                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3389                                    start, end);
3390                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3391                 }
3392         }
3393 }
3394
3395 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3396 {
3397         /* do not poll with more than halt_poll_max_steal percent of steal time */
3398         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3399             halt_poll_max_steal) {
3400                 vcpu->stat.halt_no_poll_steal++;
3401                 return true;
3402         }
3403         return false;
3404 }
3405
3406 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3407 {
3408         /* kvm common code refers to this, but never calls it */
3409         BUG();
3410         return 0;
3411 }
3412
3413 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3414                                            struct kvm_one_reg *reg)
3415 {
3416         int r = -EINVAL;
3417
3418         switch (reg->id) {
3419         case KVM_REG_S390_TODPR:
3420                 r = put_user(vcpu->arch.sie_block->todpr,
3421                              (u32 __user *)reg->addr);
3422                 break;
3423         case KVM_REG_S390_EPOCHDIFF:
3424                 r = put_user(vcpu->arch.sie_block->epoch,
3425                              (u64 __user *)reg->addr);
3426                 break;
3427         case KVM_REG_S390_CPU_TIMER:
3428                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3429                              (u64 __user *)reg->addr);
3430                 break;
3431         case KVM_REG_S390_CLOCK_COMP:
3432                 r = put_user(vcpu->arch.sie_block->ckc,
3433                              (u64 __user *)reg->addr);
3434                 break;
3435         case KVM_REG_S390_PFTOKEN:
3436                 r = put_user(vcpu->arch.pfault_token,
3437                              (u64 __user *)reg->addr);
3438                 break;
3439         case KVM_REG_S390_PFCOMPARE:
3440                 r = put_user(vcpu->arch.pfault_compare,
3441                              (u64 __user *)reg->addr);
3442                 break;
3443         case KVM_REG_S390_PFSELECT:
3444                 r = put_user(vcpu->arch.pfault_select,
3445                              (u64 __user *)reg->addr);
3446                 break;
3447         case KVM_REG_S390_PP:
3448                 r = put_user(vcpu->arch.sie_block->pp,
3449                              (u64 __user *)reg->addr);
3450                 break;
3451         case KVM_REG_S390_GBEA:
3452                 r = put_user(vcpu->arch.sie_block->gbea,
3453                              (u64 __user *)reg->addr);
3454                 break;
3455         default:
3456                 break;
3457         }
3458
3459         return r;
3460 }
3461
3462 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3463                                            struct kvm_one_reg *reg)
3464 {
3465         int r = -EINVAL;
3466         __u64 val;
3467
3468         switch (reg->id) {
3469         case KVM_REG_S390_TODPR:
3470                 r = get_user(vcpu->arch.sie_block->todpr,
3471                              (u32 __user *)reg->addr);
3472                 break;
3473         case KVM_REG_S390_EPOCHDIFF:
3474                 r = get_user(vcpu->arch.sie_block->epoch,
3475                              (u64 __user *)reg->addr);
3476                 break;
3477         case KVM_REG_S390_CPU_TIMER:
3478                 r = get_user(val, (u64 __user *)reg->addr);
3479                 if (!r)
3480                         kvm_s390_set_cpu_timer(vcpu, val);
3481                 break;
3482         case KVM_REG_S390_CLOCK_COMP:
3483                 r = get_user(vcpu->arch.sie_block->ckc,
3484                              (u64 __user *)reg->addr);
3485                 break;
3486         case KVM_REG_S390_PFTOKEN:
3487                 r = get_user(vcpu->arch.pfault_token,
3488                              (u64 __user *)reg->addr);
3489                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3490                         kvm_clear_async_pf_completion_queue(vcpu);
3491                 break;
3492         case KVM_REG_S390_PFCOMPARE:
3493                 r = get_user(vcpu->arch.pfault_compare,
3494                              (u64 __user *)reg->addr);
3495                 break;
3496         case KVM_REG_S390_PFSELECT:
3497                 r = get_user(vcpu->arch.pfault_select,
3498                              (u64 __user *)reg->addr);
3499                 break;
3500         case KVM_REG_S390_PP:
3501                 r = get_user(vcpu->arch.sie_block->pp,
3502                              (u64 __user *)reg->addr);
3503                 break;
3504         case KVM_REG_S390_GBEA:
3505                 r = get_user(vcpu->arch.sie_block->gbea,
3506                              (u64 __user *)reg->addr);
3507                 break;
3508         default:
3509                 break;
3510         }
3511
3512         return r;
3513 }
3514
3515 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3516 {
3517         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3518         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3519         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3520
3521         kvm_clear_async_pf_completion_queue(vcpu);
3522         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3523                 kvm_s390_vcpu_stop(vcpu);
3524         kvm_s390_clear_local_irqs(vcpu);
3525 }
3526
3527 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3528 {
3529         /* Initial reset is a superset of the normal reset */
3530         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3531
3532         /*
3533          * This equals initial cpu reset in pop, but we don't switch to ESA.
3534          * We do not only reset the internal data, but also ...
3535          */
3536         vcpu->arch.sie_block->gpsw.mask = 0;
3537         vcpu->arch.sie_block->gpsw.addr = 0;
3538         kvm_s390_set_prefix(vcpu, 0);
3539         kvm_s390_set_cpu_timer(vcpu, 0);
3540         vcpu->arch.sie_block->ckc = 0;
3541         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3542         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3543         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3544
3545         /* ... the data in sync regs */
3546         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3547         vcpu->run->s.regs.ckc = 0;
3548         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3549         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3550         vcpu->run->psw_addr = 0;
3551         vcpu->run->psw_mask = 0;
3552         vcpu->run->s.regs.todpr = 0;
3553         vcpu->run->s.regs.cputm = 0;
3554         vcpu->run->s.regs.ckc = 0;
3555         vcpu->run->s.regs.pp = 0;
3556         vcpu->run->s.regs.gbea = 1;
3557         vcpu->run->s.regs.fpc = 0;
3558         /*
3559          * Do not reset these registers in the protected case, as some of
3560          * them are overlayed and they are not accessible in this case
3561          * anyway.
3562          */
3563         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3564                 vcpu->arch.sie_block->gbea = 1;
3565                 vcpu->arch.sie_block->pp = 0;
3566                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3567                 vcpu->arch.sie_block->todpr = 0;
3568         }
3569 }
3570
3571 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3572 {
3573         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3574
3575         /* Clear reset is a superset of the initial reset */
3576         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3577
3578         memset(&regs->gprs, 0, sizeof(regs->gprs));
3579         memset(&regs->vrs, 0, sizeof(regs->vrs));
3580         memset(&regs->acrs, 0, sizeof(regs->acrs));
3581         memset(&regs->gscb, 0, sizeof(regs->gscb));
3582
3583         regs->etoken = 0;
3584         regs->etoken_extension = 0;
3585 }
3586
3587 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3588 {
3589         vcpu_load(vcpu);
3590         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3591         vcpu_put(vcpu);
3592         return 0;
3593 }
3594
3595 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3596 {
3597         vcpu_load(vcpu);
3598         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3599         vcpu_put(vcpu);
3600         return 0;
3601 }
3602
3603 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3604                                   struct kvm_sregs *sregs)
3605 {
3606         vcpu_load(vcpu);
3607
3608         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3609         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3610
3611         vcpu_put(vcpu);
3612         return 0;
3613 }
3614
3615 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3616                                   struct kvm_sregs *sregs)
3617 {
3618         vcpu_load(vcpu);
3619
3620         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3621         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3622
3623         vcpu_put(vcpu);
3624         return 0;
3625 }
3626
3627 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3628 {
3629         int ret = 0;
3630
3631         vcpu_load(vcpu);
3632
3633         if (test_fp_ctl(fpu->fpc)) {
3634                 ret = -EINVAL;
3635                 goto out;
3636         }
3637         vcpu->run->s.regs.fpc = fpu->fpc;
3638         if (MACHINE_HAS_VX)
3639                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3640                                  (freg_t *) fpu->fprs);
3641         else
3642                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3643
3644 out:
3645         vcpu_put(vcpu);
3646         return ret;
3647 }
3648
3649 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3650 {
3651         vcpu_load(vcpu);
3652
3653         /* make sure we have the latest values */
3654         save_fpu_regs();
3655         if (MACHINE_HAS_VX)
3656                 convert_vx_to_fp((freg_t *) fpu->fprs,
3657                                  (__vector128 *) vcpu->run->s.regs.vrs);
3658         else
3659                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3660         fpu->fpc = vcpu->run->s.regs.fpc;
3661
3662         vcpu_put(vcpu);
3663         return 0;
3664 }
3665
3666 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3667 {
3668         int rc = 0;
3669
3670         if (!is_vcpu_stopped(vcpu))
3671                 rc = -EBUSY;
3672         else {
3673                 vcpu->run->psw_mask = psw.mask;
3674                 vcpu->run->psw_addr = psw.addr;
3675         }
3676         return rc;
3677 }
3678
3679 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3680                                   struct kvm_translation *tr)
3681 {
3682         return -EINVAL; /* not implemented yet */
3683 }
3684
3685 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3686                               KVM_GUESTDBG_USE_HW_BP | \
3687                               KVM_GUESTDBG_ENABLE)
3688
3689 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3690                                         struct kvm_guest_debug *dbg)
3691 {
3692         int rc = 0;
3693
3694         vcpu_load(vcpu);
3695
3696         vcpu->guest_debug = 0;
3697         kvm_s390_clear_bp_data(vcpu);
3698
3699         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3700                 rc = -EINVAL;
3701                 goto out;
3702         }
3703         if (!sclp.has_gpere) {
3704                 rc = -EINVAL;
3705                 goto out;
3706         }
3707
3708         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3709                 vcpu->guest_debug = dbg->control;
3710                 /* enforce guest PER */
3711                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3712
3713                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3714                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3715         } else {
3716                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3717                 vcpu->arch.guestdbg.last_bp = 0;
3718         }
3719
3720         if (rc) {
3721                 vcpu->guest_debug = 0;
3722                 kvm_s390_clear_bp_data(vcpu);
3723                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3724         }
3725
3726 out:
3727         vcpu_put(vcpu);
3728         return rc;
3729 }
3730
3731 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3732                                     struct kvm_mp_state *mp_state)
3733 {
3734         int ret;
3735
3736         vcpu_load(vcpu);
3737
3738         /* CHECK_STOP and LOAD are not supported yet */
3739         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3740                                       KVM_MP_STATE_OPERATING;
3741
3742         vcpu_put(vcpu);
3743         return ret;
3744 }
3745
3746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3747                                     struct kvm_mp_state *mp_state)
3748 {
3749         int rc = 0;
3750
3751         vcpu_load(vcpu);
3752
3753         /* user space knows about this interface - let it control the state */
3754         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3755
3756         switch (mp_state->mp_state) {
3757         case KVM_MP_STATE_STOPPED:
3758                 rc = kvm_s390_vcpu_stop(vcpu);
3759                 break;
3760         case KVM_MP_STATE_OPERATING:
3761                 rc = kvm_s390_vcpu_start(vcpu);
3762                 break;
3763         case KVM_MP_STATE_LOAD:
3764                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3765                         rc = -ENXIO;
3766                         break;
3767                 }
3768                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3769                 break;
3770         case KVM_MP_STATE_CHECK_STOP:
3771                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3772         default:
3773                 rc = -ENXIO;
3774         }
3775
3776         vcpu_put(vcpu);
3777         return rc;
3778 }
3779
3780 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3781 {
3782         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3783 }
3784
3785 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3786 {
3787 retry:
3788         kvm_s390_vcpu_request_handled(vcpu);
3789         if (!kvm_request_pending(vcpu))
3790                 return 0;
3791         /*
3792          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3793          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3794          * This ensures that the ipte instruction for this request has
3795          * already finished. We might race against a second unmapper that
3796          * wants to set the blocking bit. Lets just retry the request loop.
3797          */
3798         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3799                 int rc;
3800                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3801                                           kvm_s390_get_prefix(vcpu),
3802                                           PAGE_SIZE * 2, PROT_WRITE);
3803                 if (rc) {
3804                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3805                         return rc;
3806                 }
3807                 goto retry;
3808         }
3809
3810         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3811                 vcpu->arch.sie_block->ihcpu = 0xffff;
3812                 goto retry;
3813         }
3814
3815         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3816                 if (!ibs_enabled(vcpu)) {
3817                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3818                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3819                 }
3820                 goto retry;
3821         }
3822
3823         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3824                 if (ibs_enabled(vcpu)) {
3825                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3826                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3827                 }
3828                 goto retry;
3829         }
3830
3831         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3832                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3833                 goto retry;
3834         }
3835
3836         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3837                 /*
3838                  * Disable CMM virtualization; we will emulate the ESSA
3839                  * instruction manually, in order to provide additional
3840                  * functionalities needed for live migration.
3841                  */
3842                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3843                 goto retry;
3844         }
3845
3846         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3847                 /*
3848                  * Re-enable CMM virtualization if CMMA is available and
3849                  * CMM has been used.
3850                  */
3851                 if ((vcpu->kvm->arch.use_cmma) &&
3852                     (vcpu->kvm->mm->context.uses_cmm))
3853                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3854                 goto retry;
3855         }
3856
3857         /* nothing to do, just clear the request */
3858         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3859         /* we left the vsie handler, nothing to do, just clear the request */
3860         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3861
3862         return 0;
3863 }
3864
3865 void kvm_s390_set_tod_clock(struct kvm *kvm,
3866                             const struct kvm_s390_vm_tod_clock *gtod)
3867 {
3868         struct kvm_vcpu *vcpu;
3869         struct kvm_s390_tod_clock_ext htod;
3870         int i;
3871
3872         mutex_lock(&kvm->lock);
3873         preempt_disable();
3874
3875         get_tod_clock_ext((char *)&htod);
3876
3877         kvm->arch.epoch = gtod->tod - htod.tod;
3878         kvm->arch.epdx = 0;
3879         if (test_kvm_facility(kvm, 139)) {
3880                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3881                 if (kvm->arch.epoch > gtod->tod)
3882                         kvm->arch.epdx -= 1;
3883         }
3884
3885         kvm_s390_vcpu_block_all(kvm);
3886         kvm_for_each_vcpu(i, vcpu, kvm) {
3887                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3888                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3889         }
3890
3891         kvm_s390_vcpu_unblock_all(kvm);
3892         preempt_enable();
3893         mutex_unlock(&kvm->lock);
3894 }
3895
3896 /**
3897  * kvm_arch_fault_in_page - fault-in guest page if necessary
3898  * @vcpu: The corresponding virtual cpu
3899  * @gpa: Guest physical address
3900  * @writable: Whether the page should be writable or not
3901  *
3902  * Make sure that a guest page has been faulted-in on the host.
3903  *
3904  * Return: Zero on success, negative error code otherwise.
3905  */
3906 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3907 {
3908         return gmap_fault(vcpu->arch.gmap, gpa,
3909                           writable ? FAULT_FLAG_WRITE : 0);
3910 }
3911
3912 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3913                                       unsigned long token)
3914 {
3915         struct kvm_s390_interrupt inti;
3916         struct kvm_s390_irq irq;
3917
3918         if (start_token) {
3919                 irq.u.ext.ext_params2 = token;
3920                 irq.type = KVM_S390_INT_PFAULT_INIT;
3921                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3922         } else {
3923                 inti.type = KVM_S390_INT_PFAULT_DONE;
3924                 inti.parm64 = token;
3925                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3926         }
3927 }
3928
3929 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3930                                      struct kvm_async_pf *work)
3931 {
3932         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3933         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3934
3935         return true;
3936 }
3937
3938 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3939                                  struct kvm_async_pf *work)
3940 {
3941         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3942         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3943 }
3944
3945 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3946                                struct kvm_async_pf *work)
3947 {
3948         /* s390 will always inject the page directly */
3949 }
3950
3951 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3952 {
3953         /*
3954          * s390 will always inject the page directly,
3955          * but we still want check_async_completion to cleanup
3956          */
3957         return true;
3958 }
3959
3960 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3961 {
3962         hva_t hva;
3963         struct kvm_arch_async_pf arch;
3964
3965         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3966                 return false;
3967         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3968             vcpu->arch.pfault_compare)
3969                 return false;
3970         if (psw_extint_disabled(vcpu))
3971                 return false;
3972         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3973                 return false;
3974         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3975                 return false;
3976         if (!vcpu->arch.gmap->pfault_enabled)
3977                 return false;
3978
3979         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3980         hva += current->thread.gmap_addr & ~PAGE_MASK;
3981         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3982                 return false;
3983
3984         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3985 }
3986
3987 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3988 {
3989         int rc, cpuflags;
3990
3991         /*
3992          * On s390 notifications for arriving pages will be delivered directly
3993          * to the guest but the house keeping for completed pfaults is
3994          * handled outside the worker.
3995          */
3996         kvm_check_async_pf_completion(vcpu);
3997
3998         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3999         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4000
4001         if (need_resched())
4002                 schedule();
4003
4004         if (!kvm_is_ucontrol(vcpu->kvm)) {
4005                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4006                 if (rc)
4007                         return rc;
4008         }
4009
4010         rc = kvm_s390_handle_requests(vcpu);
4011         if (rc)
4012                 return rc;
4013
4014         if (guestdbg_enabled(vcpu)) {
4015                 kvm_s390_backup_guest_per_regs(vcpu);
4016                 kvm_s390_patch_guest_per_regs(vcpu);
4017         }
4018
4019         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4020
4021         vcpu->arch.sie_block->icptcode = 0;
4022         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4023         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4024         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4025
4026         return 0;
4027 }
4028
4029 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4030 {
4031         struct kvm_s390_pgm_info pgm_info = {
4032                 .code = PGM_ADDRESSING,
4033         };
4034         u8 opcode, ilen;
4035         int rc;
4036
4037         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4038         trace_kvm_s390_sie_fault(vcpu);
4039
4040         /*
4041          * We want to inject an addressing exception, which is defined as a
4042          * suppressing or terminating exception. However, since we came here
4043          * by a DAT access exception, the PSW still points to the faulting
4044          * instruction since DAT exceptions are nullifying. So we've got
4045          * to look up the current opcode to get the length of the instruction
4046          * to be able to forward the PSW.
4047          */
4048         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4049         ilen = insn_length(opcode);
4050         if (rc < 0) {
4051                 return rc;
4052         } else if (rc) {
4053                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4054                  * Forward by arbitrary ilc, injection will take care of
4055                  * nullification if necessary.
4056                  */
4057                 pgm_info = vcpu->arch.pgm;
4058                 ilen = 4;
4059         }
4060         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4061         kvm_s390_forward_psw(vcpu, ilen);
4062         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4063 }
4064
4065 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4066 {
4067         struct mcck_volatile_info *mcck_info;
4068         struct sie_page *sie_page;
4069
4070         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4071                    vcpu->arch.sie_block->icptcode);
4072         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4073
4074         if (guestdbg_enabled(vcpu))
4075                 kvm_s390_restore_guest_per_regs(vcpu);
4076
4077         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4078         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4079
4080         if (exit_reason == -EINTR) {
4081                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4082                 sie_page = container_of(vcpu->arch.sie_block,
4083                                         struct sie_page, sie_block);
4084                 mcck_info = &sie_page->mcck_info;
4085                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4086                 return 0;
4087         }
4088
4089         if (vcpu->arch.sie_block->icptcode > 0) {
4090                 int rc = kvm_handle_sie_intercept(vcpu);
4091
4092                 if (rc != -EOPNOTSUPP)
4093                         return rc;
4094                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4095                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4096                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4097                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4098                 return -EREMOTE;
4099         } else if (exit_reason != -EFAULT) {
4100                 vcpu->stat.exit_null++;
4101                 return 0;
4102         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4103                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4104                 vcpu->run->s390_ucontrol.trans_exc_code =
4105                                                 current->thread.gmap_addr;
4106                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4107                 return -EREMOTE;
4108         } else if (current->thread.gmap_pfault) {
4109                 trace_kvm_s390_major_guest_pfault(vcpu);
4110                 current->thread.gmap_pfault = 0;
4111                 if (kvm_arch_setup_async_pf(vcpu))
4112                         return 0;
4113                 vcpu->stat.pfault_sync++;
4114                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4115         }
4116         return vcpu_post_run_fault_in_sie(vcpu);
4117 }
4118
4119 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4120 static int __vcpu_run(struct kvm_vcpu *vcpu)
4121 {
4122         int rc, exit_reason;
4123         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4124
4125         /*
4126          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4127          * ning the guest), so that memslots (and other stuff) are protected
4128          */
4129         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4130
4131         do {
4132                 rc = vcpu_pre_run(vcpu);
4133                 if (rc)
4134                         break;
4135
4136                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4137                 /*
4138                  * As PF_VCPU will be used in fault handler, between
4139                  * guest_enter and guest_exit should be no uaccess.
4140                  */
4141                 local_irq_disable();
4142                 guest_enter_irqoff();
4143                 __disable_cpu_timer_accounting(vcpu);
4144                 local_irq_enable();
4145                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4146                         memcpy(sie_page->pv_grregs,
4147                                vcpu->run->s.regs.gprs,
4148                                sizeof(sie_page->pv_grregs));
4149                 }
4150                 exit_reason = sie64a(vcpu->arch.sie_block,
4151                                      vcpu->run->s.regs.gprs);
4152                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153                         memcpy(vcpu->run->s.regs.gprs,
4154                                sie_page->pv_grregs,
4155                                sizeof(sie_page->pv_grregs));
4156                         /*
4157                          * We're not allowed to inject interrupts on intercepts
4158                          * that leave the guest state in an "in-between" state
4159                          * where the next SIE entry will do a continuation.
4160                          * Fence interrupts in our "internal" PSW.
4161                          */
4162                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4163                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4164                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4165                         }
4166                 }
4167                 local_irq_disable();
4168                 __enable_cpu_timer_accounting(vcpu);
4169                 guest_exit_irqoff();
4170                 local_irq_enable();
4171                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4172
4173                 rc = vcpu_post_run(vcpu, exit_reason);
4174         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4175
4176         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4177         return rc;
4178 }
4179
4180 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4181 {
4182         struct kvm_run *kvm_run = vcpu->run;
4183         struct runtime_instr_cb *riccb;
4184         struct gs_cb *gscb;
4185
4186         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4187         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4188         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4189         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4190         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4191                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4192                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4193                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4194         }
4195         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4196                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4197                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4198                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4199                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4200                         kvm_clear_async_pf_completion_queue(vcpu);
4201         }
4202         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4203                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4204                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4205         }
4206         /*
4207          * If userspace sets the riccb (e.g. after migration) to a valid state,
4208          * we should enable RI here instead of doing the lazy enablement.
4209          */
4210         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4211             test_kvm_facility(vcpu->kvm, 64) &&
4212             riccb->v &&
4213             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4214                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4215                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4216         }
4217         /*
4218          * If userspace sets the gscb (e.g. after migration) to non-zero,
4219          * we should enable GS here instead of doing the lazy enablement.
4220          */
4221         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4222             test_kvm_facility(vcpu->kvm, 133) &&
4223             gscb->gssm &&
4224             !vcpu->arch.gs_enabled) {
4225                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4226                 vcpu->arch.sie_block->ecb |= ECB_GS;
4227                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4228                 vcpu->arch.gs_enabled = 1;
4229         }
4230         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4231             test_kvm_facility(vcpu->kvm, 82)) {
4232                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4233                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4234         }
4235         if (MACHINE_HAS_GS) {
4236                 preempt_disable();
4237                 __ctl_set_bit(2, 4);
4238                 if (current->thread.gs_cb) {
4239                         vcpu->arch.host_gscb = current->thread.gs_cb;
4240                         save_gs_cb(vcpu->arch.host_gscb);
4241                 }
4242                 if (vcpu->arch.gs_enabled) {
4243                         current->thread.gs_cb = (struct gs_cb *)
4244                                                 &vcpu->run->s.regs.gscb;
4245                         restore_gs_cb(current->thread.gs_cb);
4246                 }
4247                 preempt_enable();
4248         }
4249         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4250 }
4251
4252 static void sync_regs(struct kvm_vcpu *vcpu)
4253 {
4254         struct kvm_run *kvm_run = vcpu->run;
4255
4256         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4257                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4258         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4259                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4260                 /* some control register changes require a tlb flush */
4261                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4262         }
4263         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4264                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4265                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4266         }
4267         save_access_regs(vcpu->arch.host_acrs);
4268         restore_access_regs(vcpu->run->s.regs.acrs);
4269         /* save host (userspace) fprs/vrs */
4270         save_fpu_regs();
4271         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4272         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4273         if (MACHINE_HAS_VX)
4274                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4275         else
4276                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4277         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4278         if (test_fp_ctl(current->thread.fpu.fpc))
4279                 /* User space provided an invalid FPC, let's clear it */
4280                 current->thread.fpu.fpc = 0;
4281
4282         /* Sync fmt2 only data */
4283         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4284                 sync_regs_fmt2(vcpu);
4285         } else {
4286                 /*
4287                  * In several places we have to modify our internal view to
4288                  * not do things that are disallowed by the ultravisor. For
4289                  * example we must not inject interrupts after specific exits
4290                  * (e.g. 112 prefix page not secure). We do this by turning
4291                  * off the machine check, external and I/O interrupt bits
4292                  * of our PSW copy. To avoid getting validity intercepts, we
4293                  * do only accept the condition code from userspace.
4294                  */
4295                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4296                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4297                                                    PSW_MASK_CC;
4298         }
4299
4300         kvm_run->kvm_dirty_regs = 0;
4301 }
4302
4303 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4304 {
4305         struct kvm_run *kvm_run = vcpu->run;
4306
4307         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4308         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4309         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4310         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4311         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4312         if (MACHINE_HAS_GS) {
4313                 __ctl_set_bit(2, 4);
4314                 if (vcpu->arch.gs_enabled)
4315                         save_gs_cb(current->thread.gs_cb);
4316                 preempt_disable();
4317                 current->thread.gs_cb = vcpu->arch.host_gscb;
4318                 restore_gs_cb(vcpu->arch.host_gscb);
4319                 preempt_enable();
4320                 if (!vcpu->arch.host_gscb)
4321                         __ctl_clear_bit(2, 4);
4322                 vcpu->arch.host_gscb = NULL;
4323         }
4324         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4325 }
4326
4327 static void store_regs(struct kvm_vcpu *vcpu)
4328 {
4329         struct kvm_run *kvm_run = vcpu->run;
4330
4331         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4332         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4333         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4334         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4335         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4336         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4337         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4338         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4339         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4340         save_access_regs(vcpu->run->s.regs.acrs);
4341         restore_access_regs(vcpu->arch.host_acrs);
4342         /* Save guest register state */
4343         save_fpu_regs();
4344         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4345         /* Restore will be done lazily at return */
4346         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4347         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4348         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4349                 store_regs_fmt2(vcpu);
4350 }
4351
4352 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4353 {
4354         struct kvm_run *kvm_run = vcpu->run;
4355         int rc;
4356
4357         if (kvm_run->immediate_exit)
4358                 return -EINTR;
4359
4360         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4361             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4362                 return -EINVAL;
4363
4364         vcpu_load(vcpu);
4365
4366         if (guestdbg_exit_pending(vcpu)) {
4367                 kvm_s390_prepare_debug_exit(vcpu);
4368                 rc = 0;
4369                 goto out;
4370         }
4371
4372         kvm_sigset_activate(vcpu);
4373
4374         /*
4375          * no need to check the return value of vcpu_start as it can only have
4376          * an error for protvirt, but protvirt means user cpu state
4377          */
4378         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4379                 kvm_s390_vcpu_start(vcpu);
4380         } else if (is_vcpu_stopped(vcpu)) {
4381                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4382                                    vcpu->vcpu_id);
4383                 rc = -EINVAL;
4384                 goto out;
4385         }
4386
4387         sync_regs(vcpu);
4388         enable_cpu_timer_accounting(vcpu);
4389
4390         might_fault();
4391         rc = __vcpu_run(vcpu);
4392
4393         if (signal_pending(current) && !rc) {
4394                 kvm_run->exit_reason = KVM_EXIT_INTR;
4395                 rc = -EINTR;
4396         }
4397
4398         if (guestdbg_exit_pending(vcpu) && !rc)  {
4399                 kvm_s390_prepare_debug_exit(vcpu);
4400                 rc = 0;
4401         }
4402
4403         if (rc == -EREMOTE) {
4404                 /* userspace support is needed, kvm_run has been prepared */
4405                 rc = 0;
4406         }
4407
4408         disable_cpu_timer_accounting(vcpu);
4409         store_regs(vcpu);
4410
4411         kvm_sigset_deactivate(vcpu);
4412
4413         vcpu->stat.exit_userspace++;
4414 out:
4415         vcpu_put(vcpu);
4416         return rc;
4417 }
4418
4419 /*
4420  * store status at address
4421  * we use have two special cases:
4422  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4423  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4424  */
4425 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4426 {
4427         unsigned char archmode = 1;
4428         freg_t fprs[NUM_FPRS];
4429         unsigned int px;
4430         u64 clkcomp, cputm;
4431         int rc;
4432
4433         px = kvm_s390_get_prefix(vcpu);
4434         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4435                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4436                         return -EFAULT;
4437                 gpa = 0;
4438         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4439                 if (write_guest_real(vcpu, 163, &archmode, 1))
4440                         return -EFAULT;
4441                 gpa = px;
4442         } else
4443                 gpa -= __LC_FPREGS_SAVE_AREA;
4444
4445         /* manually convert vector registers if necessary */
4446         if (MACHINE_HAS_VX) {
4447                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4448                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4449                                      fprs, 128);
4450         } else {
4451                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4452                                      vcpu->run->s.regs.fprs, 128);
4453         }
4454         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4455                               vcpu->run->s.regs.gprs, 128);
4456         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4457                               &vcpu->arch.sie_block->gpsw, 16);
4458         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4459                               &px, 4);
4460         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4461                               &vcpu->run->s.regs.fpc, 4);
4462         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4463                               &vcpu->arch.sie_block->todpr, 4);
4464         cputm = kvm_s390_get_cpu_timer(vcpu);
4465         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4466                               &cputm, 8);
4467         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4468         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4469                               &clkcomp, 8);
4470         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4471                               &vcpu->run->s.regs.acrs, 64);
4472         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4473                               &vcpu->arch.sie_block->gcr, 128);
4474         return rc ? -EFAULT : 0;
4475 }
4476
4477 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4478 {
4479         /*
4480          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4481          * switch in the run ioctl. Let's update our copies before we save
4482          * it into the save area
4483          */
4484         save_fpu_regs();
4485         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4486         save_access_regs(vcpu->run->s.regs.acrs);
4487
4488         return kvm_s390_store_status_unloaded(vcpu, addr);
4489 }
4490
4491 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4492 {
4493         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4494         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4495 }
4496
4497 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4498 {
4499         unsigned int i;
4500         struct kvm_vcpu *vcpu;
4501
4502         kvm_for_each_vcpu(i, vcpu, kvm) {
4503                 __disable_ibs_on_vcpu(vcpu);
4504         }
4505 }
4506
4507 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4508 {
4509         if (!sclp.has_ibs)
4510                 return;
4511         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4512         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4513 }
4514
4515 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4516 {
4517         int i, online_vcpus, r = 0, started_vcpus = 0;
4518
4519         if (!is_vcpu_stopped(vcpu))
4520                 return 0;
4521
4522         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4523         /* Only one cpu at a time may enter/leave the STOPPED state. */
4524         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4525         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4526
4527         /* Let's tell the UV that we want to change into the operating state */
4528         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4529                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4530                 if (r) {
4531                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4532                         return r;
4533                 }
4534         }
4535
4536         for (i = 0; i < online_vcpus; i++) {
4537                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4538                         started_vcpus++;
4539         }
4540
4541         if (started_vcpus == 0) {
4542                 /* we're the only active VCPU -> speed it up */
4543                 __enable_ibs_on_vcpu(vcpu);
4544         } else if (started_vcpus == 1) {
4545                 /*
4546                  * As we are starting a second VCPU, we have to disable
4547                  * the IBS facility on all VCPUs to remove potentially
4548                  * oustanding ENABLE requests.
4549                  */
4550                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4551         }
4552
4553         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4554         /*
4555          * The real PSW might have changed due to a RESTART interpreted by the
4556          * ultravisor. We block all interrupts and let the next sie exit
4557          * refresh our view.
4558          */
4559         if (kvm_s390_pv_cpu_is_protected(vcpu))
4560                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4561         /*
4562          * Another VCPU might have used IBS while we were offline.
4563          * Let's play safe and flush the VCPU at startup.
4564          */
4565         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4566         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4567         return 0;
4568 }
4569
4570 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4571 {
4572         int i, online_vcpus, r = 0, started_vcpus = 0;
4573         struct kvm_vcpu *started_vcpu = NULL;
4574
4575         if (is_vcpu_stopped(vcpu))
4576                 return 0;
4577
4578         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4579         /* Only one cpu at a time may enter/leave the STOPPED state. */
4580         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4581         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4582
4583         /* Let's tell the UV that we want to change into the stopped state */
4584         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4585                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4586                 if (r) {
4587                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4588                         return r;
4589                 }
4590         }
4591
4592         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4593         kvm_s390_clear_stop_irq(vcpu);
4594
4595         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4596         __disable_ibs_on_vcpu(vcpu);
4597
4598         for (i = 0; i < online_vcpus; i++) {
4599                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4600                         started_vcpus++;
4601                         started_vcpu = vcpu->kvm->vcpus[i];
4602                 }
4603         }
4604
4605         if (started_vcpus == 1) {
4606                 /*
4607                  * As we only have one VCPU left, we want to enable the
4608                  * IBS facility for that VCPU to speed it up.
4609                  */
4610                 __enable_ibs_on_vcpu(started_vcpu);
4611         }
4612
4613         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4614         return 0;
4615 }
4616
4617 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4618                                      struct kvm_enable_cap *cap)
4619 {
4620         int r;
4621
4622         if (cap->flags)
4623                 return -EINVAL;
4624
4625         switch (cap->cap) {
4626         case KVM_CAP_S390_CSS_SUPPORT:
4627                 if (!vcpu->kvm->arch.css_support) {
4628                         vcpu->kvm->arch.css_support = 1;
4629                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4630                         trace_kvm_s390_enable_css(vcpu->kvm);
4631                 }
4632                 r = 0;
4633                 break;
4634         default:
4635                 r = -EINVAL;
4636                 break;
4637         }
4638         return r;
4639 }
4640
4641 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4642                                    struct kvm_s390_mem_op *mop)
4643 {
4644         void __user *uaddr = (void __user *)mop->buf;
4645         int r = 0;
4646
4647         if (mop->flags || !mop->size)
4648                 return -EINVAL;
4649         if (mop->size + mop->sida_offset < mop->size)
4650                 return -EINVAL;
4651         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4652                 return -E2BIG;
4653
4654         switch (mop->op) {
4655         case KVM_S390_MEMOP_SIDA_READ:
4656                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4657                                  mop->sida_offset), mop->size))
4658                         r = -EFAULT;
4659
4660                 break;
4661         case KVM_S390_MEMOP_SIDA_WRITE:
4662                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4663                                    mop->sida_offset), uaddr, mop->size))
4664                         r = -EFAULT;
4665                 break;
4666         }
4667         return r;
4668 }
4669 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4670                                   struct kvm_s390_mem_op *mop)
4671 {
4672         void __user *uaddr = (void __user *)mop->buf;
4673         void *tmpbuf = NULL;
4674         int r = 0;
4675         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4676                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4677
4678         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4679                 return -EINVAL;
4680
4681         if (mop->size > MEM_OP_MAX_SIZE)
4682                 return -E2BIG;
4683
4684         if (kvm_s390_pv_cpu_is_protected(vcpu))
4685                 return -EINVAL;
4686
4687         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4688                 tmpbuf = vmalloc(mop->size);
4689                 if (!tmpbuf)
4690                         return -ENOMEM;
4691         }
4692
4693         switch (mop->op) {
4694         case KVM_S390_MEMOP_LOGICAL_READ:
4695                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4696                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4697                                             mop->size, GACC_FETCH);
4698                         break;
4699                 }
4700                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4701                 if (r == 0) {
4702                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4703                                 r = -EFAULT;
4704                 }
4705                 break;
4706         case KVM_S390_MEMOP_LOGICAL_WRITE:
4707                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4708                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4709                                             mop->size, GACC_STORE);
4710                         break;
4711                 }
4712                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4713                         r = -EFAULT;
4714                         break;
4715                 }
4716                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4717                 break;
4718         }
4719
4720         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4721                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4722
4723         vfree(tmpbuf);
4724         return r;
4725 }
4726
4727 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4728                                       struct kvm_s390_mem_op *mop)
4729 {
4730         int r, srcu_idx;
4731
4732         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4733
4734         switch (mop->op) {
4735         case KVM_S390_MEMOP_LOGICAL_READ:
4736         case KVM_S390_MEMOP_LOGICAL_WRITE:
4737                 r = kvm_s390_guest_mem_op(vcpu, mop);
4738                 break;
4739         case KVM_S390_MEMOP_SIDA_READ:
4740         case KVM_S390_MEMOP_SIDA_WRITE:
4741                 /* we are locked against sida going away by the vcpu->mutex */
4742                 r = kvm_s390_guest_sida_op(vcpu, mop);
4743                 break;
4744         default:
4745                 r = -EINVAL;
4746         }
4747
4748         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4749         return r;
4750 }
4751
4752 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4753                                unsigned int ioctl, unsigned long arg)
4754 {
4755         struct kvm_vcpu *vcpu = filp->private_data;
4756         void __user *argp = (void __user *)arg;
4757
4758         switch (ioctl) {
4759         case KVM_S390_IRQ: {
4760                 struct kvm_s390_irq s390irq;
4761
4762                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4763                         return -EFAULT;
4764                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4765         }
4766         case KVM_S390_INTERRUPT: {
4767                 struct kvm_s390_interrupt s390int;
4768                 struct kvm_s390_irq s390irq = {};
4769
4770                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4771                         return -EFAULT;
4772                 if (s390int_to_s390irq(&s390int, &s390irq))
4773                         return -EINVAL;
4774                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4775         }
4776         }
4777         return -ENOIOCTLCMD;
4778 }
4779
4780 long kvm_arch_vcpu_ioctl(struct file *filp,
4781                          unsigned int ioctl, unsigned long arg)
4782 {
4783         struct kvm_vcpu *vcpu = filp->private_data;
4784         void __user *argp = (void __user *)arg;
4785         int idx;
4786         long r;
4787         u16 rc, rrc;
4788
4789         vcpu_load(vcpu);
4790
4791         switch (ioctl) {
4792         case KVM_S390_STORE_STATUS:
4793                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4794                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4795                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4796                 break;
4797         case KVM_S390_SET_INITIAL_PSW: {
4798                 psw_t psw;
4799
4800                 r = -EFAULT;
4801                 if (copy_from_user(&psw, argp, sizeof(psw)))
4802                         break;
4803                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4804                 break;
4805         }
4806         case KVM_S390_CLEAR_RESET:
4807                 r = 0;
4808                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4809                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4810                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4811                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4812                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4813                                    rc, rrc);
4814                 }
4815                 break;
4816         case KVM_S390_INITIAL_RESET:
4817                 r = 0;
4818                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4819                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4820                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4821                                           UVC_CMD_CPU_RESET_INITIAL,
4822                                           &rc, &rrc);
4823                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4824                                    rc, rrc);
4825                 }
4826                 break;
4827         case KVM_S390_NORMAL_RESET:
4828                 r = 0;
4829                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4830                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4831                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4832                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4833                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4834                                    rc, rrc);
4835                 }
4836                 break;
4837         case KVM_SET_ONE_REG:
4838         case KVM_GET_ONE_REG: {
4839                 struct kvm_one_reg reg;
4840                 r = -EINVAL;
4841                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4842                         break;
4843                 r = -EFAULT;
4844                 if (copy_from_user(&reg, argp, sizeof(reg)))
4845                         break;
4846                 if (ioctl == KVM_SET_ONE_REG)
4847                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4848                 else
4849                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4850                 break;
4851         }
4852 #ifdef CONFIG_KVM_S390_UCONTROL
4853         case KVM_S390_UCAS_MAP: {
4854                 struct kvm_s390_ucas_mapping ucasmap;
4855
4856                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4857                         r = -EFAULT;
4858                         break;
4859                 }
4860
4861                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4862                         r = -EINVAL;
4863                         break;
4864                 }
4865
4866                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4867                                      ucasmap.vcpu_addr, ucasmap.length);
4868                 break;
4869         }
4870         case KVM_S390_UCAS_UNMAP: {
4871                 struct kvm_s390_ucas_mapping ucasmap;
4872
4873                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4874                         r = -EFAULT;
4875                         break;
4876                 }
4877
4878                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4879                         r = -EINVAL;
4880                         break;
4881                 }
4882
4883                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4884                         ucasmap.length);
4885                 break;
4886         }
4887 #endif
4888         case KVM_S390_VCPU_FAULT: {
4889                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4890                 break;
4891         }
4892         case KVM_ENABLE_CAP:
4893         {
4894                 struct kvm_enable_cap cap;
4895                 r = -EFAULT;
4896                 if (copy_from_user(&cap, argp, sizeof(cap)))
4897                         break;
4898                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4899                 break;
4900         }
4901         case KVM_S390_MEM_OP: {
4902                 struct kvm_s390_mem_op mem_op;
4903
4904                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4905                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4906                 else
4907                         r = -EFAULT;
4908                 break;
4909         }
4910         case KVM_S390_SET_IRQ_STATE: {
4911                 struct kvm_s390_irq_state irq_state;
4912
4913                 r = -EFAULT;
4914                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4915                         break;
4916                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4917                     irq_state.len == 0 ||
4918                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4919                         r = -EINVAL;
4920                         break;
4921                 }
4922                 /* do not use irq_state.flags, it will break old QEMUs */
4923                 r = kvm_s390_set_irq_state(vcpu,
4924                                            (void __user *) irq_state.buf,
4925                                            irq_state.len);
4926                 break;
4927         }
4928         case KVM_S390_GET_IRQ_STATE: {
4929                 struct kvm_s390_irq_state irq_state;
4930
4931                 r = -EFAULT;
4932                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4933                         break;
4934                 if (irq_state.len == 0) {
4935                         r = -EINVAL;
4936                         break;
4937                 }
4938                 /* do not use irq_state.flags, it will break old QEMUs */
4939                 r = kvm_s390_get_irq_state(vcpu,
4940                                            (__u8 __user *)  irq_state.buf,
4941                                            irq_state.len);
4942                 break;
4943         }
4944         default:
4945                 r = -ENOTTY;
4946         }
4947
4948         vcpu_put(vcpu);
4949         return r;
4950 }
4951
4952 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4953 {
4954 #ifdef CONFIG_KVM_S390_UCONTROL
4955         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4956                  && (kvm_is_ucontrol(vcpu->kvm))) {
4957                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4958                 get_page(vmf->page);
4959                 return 0;
4960         }
4961 #endif
4962         return VM_FAULT_SIGBUS;
4963 }
4964
4965 /* Section: memory related */
4966 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4967                                    struct kvm_memory_slot *memslot,
4968                                    const struct kvm_userspace_memory_region *mem,
4969                                    enum kvm_mr_change change)
4970 {
4971         /* A few sanity checks. We can have memory slots which have to be
4972            located/ended at a segment boundary (1MB). The memory in userland is
4973            ok to be fragmented into various different vmas. It is okay to mmap()
4974            and munmap() stuff in this slot after doing this call at any time */
4975
4976         if (mem->userspace_addr & 0xffffful)
4977                 return -EINVAL;
4978
4979         if (mem->memory_size & 0xffffful)
4980                 return -EINVAL;
4981
4982         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4983                 return -EINVAL;
4984
4985         /* When we are protected, we should not change the memory slots */
4986         if (kvm_s390_pv_get_handle(kvm))
4987                 return -EINVAL;
4988         return 0;
4989 }
4990
4991 void kvm_arch_commit_memory_region(struct kvm *kvm,
4992                                 const struct kvm_userspace_memory_region *mem,
4993                                 struct kvm_memory_slot *old,
4994                                 const struct kvm_memory_slot *new,
4995                                 enum kvm_mr_change change)
4996 {
4997         int rc = 0;
4998
4999         switch (change) {
5000         case KVM_MR_DELETE:
5001                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5002                                         old->npages * PAGE_SIZE);
5003                 break;
5004         case KVM_MR_MOVE:
5005                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5006                                         old->npages * PAGE_SIZE);
5007                 if (rc)
5008                         break;
5009                 fallthrough;
5010         case KVM_MR_CREATE:
5011                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5012                                       mem->guest_phys_addr, mem->memory_size);
5013                 break;
5014         case KVM_MR_FLAGS_ONLY:
5015                 break;
5016         default:
5017                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5018         }
5019         if (rc)
5020                 pr_warn("failed to commit memory region\n");
5021         return;
5022 }
5023
5024 static inline unsigned long nonhyp_mask(int i)
5025 {
5026         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5027
5028         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5029 }
5030
5031 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5032 {
5033         vcpu->valid_wakeup = false;
5034 }
5035
5036 static int __init kvm_s390_init(void)
5037 {
5038         int i;
5039
5040         if (!sclp.has_sief2) {
5041                 pr_info("SIE is not available\n");
5042                 return -ENODEV;
5043         }
5044
5045         if (nested && hpage) {
5046                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5047                 return -EINVAL;
5048         }
5049
5050         for (i = 0; i < 16; i++)
5051                 kvm_s390_fac_base[i] |=
5052                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5053
5054         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5055 }
5056
5057 static void __exit kvm_s390_exit(void)
5058 {
5059         kvm_exit();
5060 }
5061
5062 module_init(kvm_s390_init);
5063 module_exit(kvm_s390_exit);
5064
5065 /*
5066  * Enable autoloading of the kvm module.
5067  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5068  * since x86 takes a different approach.
5069  */
5070 #include <linux/miscdevice.h>
5071 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5072 MODULE_ALIAS("devname:kvm");