Merge tag 'asoc-fix-v5.7' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie...
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2018
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include "kvm-s390.h"
48 #include "gaccess.h"
49
50 #define CREATE_TRACE_POINTS
51 #include "trace.h"
52 #include "trace-s390.h"
53
54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
55 #define LOCAL_IRQS 32
56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
58
59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
61
62 struct kvm_stats_debugfs_item debugfs_entries[] = {
63         { "userspace_handled", VCPU_STAT(exit_userspace) },
64         { "exit_null", VCPU_STAT(exit_null) },
65         { "exit_validity", VCPU_STAT(exit_validity) },
66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
67         { "exit_external_request", VCPU_STAT(exit_external_request) },
68         { "exit_io_request", VCPU_STAT(exit_io_request) },
69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
70         { "exit_instruction", VCPU_STAT(exit_instruction) },
71         { "exit_pei", VCPU_STAT(exit_pei) },
72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
93         { "deliver_program", VCPU_STAT(deliver_program) },
94         { "deliver_io", VCPU_STAT(deliver_io) },
95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
97         { "inject_ckc", VCPU_STAT(inject_ckc) },
98         { "inject_cputm", VCPU_STAT(inject_cputm) },
99         { "inject_external_call", VCPU_STAT(inject_external_call) },
100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
102         { "inject_io", VM_STAT(inject_io) },
103         { "inject_mchk", VCPU_STAT(inject_mchk) },
104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
105         { "inject_program", VCPU_STAT(inject_program) },
106         { "inject_restart", VCPU_STAT(inject_restart) },
107         { "inject_service_signal", VM_STAT(inject_service_signal) },
108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
111         { "inject_virtio", VM_STAT(inject_virtio) },
112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
113         { "instruction_gs", VCPU_STAT(instruction_gs) },
114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
120         { "instruction_sck", VCPU_STAT(instruction_sck) },
121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
122         { "instruction_spx", VCPU_STAT(instruction_spx) },
123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
124         { "instruction_stap", VCPU_STAT(instruction_stap) },
125         { "instruction_iske", VCPU_STAT(instruction_iske) },
126         { "instruction_ri", VCPU_STAT(instruction_ri) },
127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
128         { "instruction_sske", VCPU_STAT(instruction_sske) },
129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
130         { "instruction_essa", VCPU_STAT(instruction_essa) },
131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
133         { "instruction_tb", VCPU_STAT(instruction_tb) },
134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
138         { "instruction_sie", VCPU_STAT(instruction_sie) },
139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
158         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
159         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
160         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
161         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
162         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
163         { NULL }
164 };
165
166 struct kvm_s390_tod_clock_ext {
167         __u8 epoch_idx;
168         __u64 tod;
169         __u8 reserved[7];
170 } __packed;
171
172 /* allow nested virtualization in KVM (if enabled by user space) */
173 static int nested;
174 module_param(nested, int, S_IRUGO);
175 MODULE_PARM_DESC(nested, "Nested virtualization support");
176
177 /* allow 1m huge page guest backing, if !nested */
178 static int hpage;
179 module_param(hpage, int, 0444);
180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
181
182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
183 static u8 halt_poll_max_steal = 10;
184 module_param(halt_poll_max_steal, byte, 0644);
185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
186
187 /*
188  * For now we handle at most 16 double words as this is what the s390 base
189  * kernel handles and stores in the prefix page. If we ever need to go beyond
190  * this, this requires changes to code, but the external uapi can stay.
191  */
192 #define SIZE_INTERNAL 16
193
194 /*
195  * Base feature mask that defines default mask for facilities. Consists of the
196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
197  */
198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
199 /*
200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
201  * and defines the facilities that can be enabled via a cpu model.
202  */
203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
204
205 static unsigned long kvm_s390_fac_size(void)
206 {
207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
208         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
209         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
210                 sizeof(S390_lowcore.stfle_fac_list));
211
212         return SIZE_INTERNAL;
213 }
214
215 /* available cpu features supported by kvm */
216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
217 /* available subfunctions indicated via query / "test bit" */
218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
219
220 static struct gmap_notifier gmap_notifier;
221 static struct gmap_notifier vsie_gmap_notifier;
222 debug_info_t *kvm_s390_dbf;
223
224 /* Section: not file related */
225 int kvm_arch_hardware_enable(void)
226 {
227         /* every s390 is virtualization enabled ;-) */
228         return 0;
229 }
230
231 int kvm_arch_check_processor_compat(void)
232 {
233         return 0;
234 }
235
236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
237                               unsigned long end);
238
239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
240 {
241         u8 delta_idx = 0;
242
243         /*
244          * The TOD jumps by delta, we have to compensate this by adding
245          * -delta to the epoch.
246          */
247         delta = -delta;
248
249         /* sign-extension - we're adding to signed values below */
250         if ((s64)delta < 0)
251                 delta_idx = -1;
252
253         scb->epoch += delta;
254         if (scb->ecd & ECD_MEF) {
255                 scb->epdx += delta_idx;
256                 if (scb->epoch < delta)
257                         scb->epdx += 1;
258         }
259 }
260
261 /*
262  * This callback is executed during stop_machine(). All CPUs are therefore
263  * temporarily stopped. In order not to change guest behavior, we have to
264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
265  * so a CPU won't be stopped while calculating with the epoch.
266  */
267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
268                           void *v)
269 {
270         struct kvm *kvm;
271         struct kvm_vcpu *vcpu;
272         int i;
273         unsigned long long *delta = v;
274
275         list_for_each_entry(kvm, &vm_list, vm_list) {
276                 kvm_for_each_vcpu(i, vcpu, kvm) {
277                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
278                         if (i == 0) {
279                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
280                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
281                         }
282                         if (vcpu->arch.cputm_enabled)
283                                 vcpu->arch.cputm_start += *delta;
284                         if (vcpu->arch.vsie_block)
285                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
286                                                    *delta);
287                 }
288         }
289         return NOTIFY_OK;
290 }
291
292 static struct notifier_block kvm_clock_notifier = {
293         .notifier_call = kvm_clock_sync,
294 };
295
296 int kvm_arch_hardware_setup(void)
297 {
298         gmap_notifier.notifier_call = kvm_gmap_notifier;
299         gmap_register_pte_notifier(&gmap_notifier);
300         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
301         gmap_register_pte_notifier(&vsie_gmap_notifier);
302         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
303                                        &kvm_clock_notifier);
304         return 0;
305 }
306
307 void kvm_arch_hardware_unsetup(void)
308 {
309         gmap_unregister_pte_notifier(&gmap_notifier);
310         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
311         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
312                                          &kvm_clock_notifier);
313 }
314
315 static void allow_cpu_feat(unsigned long nr)
316 {
317         set_bit_inv(nr, kvm_s390_available_cpu_feat);
318 }
319
320 static inline int plo_test_bit(unsigned char nr)
321 {
322         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
323         int cc;
324
325         asm volatile(
326                 /* Parameter registers are ignored for "test bit" */
327                 "       plo     0,0,0,0(0)\n"
328                 "       ipm     %0\n"
329                 "       srl     %0,28\n"
330                 : "=d" (cc)
331                 : "d" (r0)
332                 : "cc");
333         return cc == 0;
334 }
335
336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
337 {
338         register unsigned long r0 asm("0") = 0; /* query function */
339         register unsigned long r1 asm("1") = (unsigned long) query;
340
341         asm volatile(
342                 /* Parameter regs are ignored */
343                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
344                 :
345                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
346                 : "cc", "memory");
347 }
348
349 #define INSN_SORTL 0xb938
350 #define INSN_DFLTCC 0xb939
351
352 static void kvm_s390_cpu_feat_init(void)
353 {
354         int i;
355
356         for (i = 0; i < 256; ++i) {
357                 if (plo_test_bit(i))
358                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
359         }
360
361         if (test_facility(28)) /* TOD-clock steering */
362                 ptff(kvm_s390_available_subfunc.ptff,
363                      sizeof(kvm_s390_available_subfunc.ptff),
364                      PTFF_QAF);
365
366         if (test_facility(17)) { /* MSA */
367                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
368                               kvm_s390_available_subfunc.kmac);
369                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
370                               kvm_s390_available_subfunc.kmc);
371                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
372                               kvm_s390_available_subfunc.km);
373                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
374                               kvm_s390_available_subfunc.kimd);
375                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
376                               kvm_s390_available_subfunc.klmd);
377         }
378         if (test_facility(76)) /* MSA3 */
379                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
380                               kvm_s390_available_subfunc.pckmo);
381         if (test_facility(77)) { /* MSA4 */
382                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
383                               kvm_s390_available_subfunc.kmctr);
384                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
385                               kvm_s390_available_subfunc.kmf);
386                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
387                               kvm_s390_available_subfunc.kmo);
388                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
389                               kvm_s390_available_subfunc.pcc);
390         }
391         if (test_facility(57)) /* MSA5 */
392                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
393                               kvm_s390_available_subfunc.ppno);
394
395         if (test_facility(146)) /* MSA8 */
396                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
397                               kvm_s390_available_subfunc.kma);
398
399         if (test_facility(155)) /* MSA9 */
400                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
401                               kvm_s390_available_subfunc.kdsa);
402
403         if (test_facility(150)) /* SORTL */
404                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
405
406         if (test_facility(151)) /* DFLTCC */
407                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
408
409         if (MACHINE_HAS_ESOP)
410                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
411         /*
412          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
413          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
414          */
415         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
416             !test_facility(3) || !nested)
417                 return;
418         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
419         if (sclp.has_64bscao)
420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
421         if (sclp.has_siif)
422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
423         if (sclp.has_gpere)
424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
425         if (sclp.has_gsls)
426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
427         if (sclp.has_ib)
428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
429         if (sclp.has_cei)
430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
431         if (sclp.has_ibs)
432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
433         if (sclp.has_kss)
434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
435         /*
436          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
437          * all skey handling functions read/set the skey from the PGSTE
438          * instead of the real storage key.
439          *
440          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
441          * pages being detected as preserved although they are resident.
442          *
443          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
444          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
445          *
446          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
447          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
448          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
449          *
450          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
451          * cannot easily shadow the SCA because of the ipte lock.
452          */
453 }
454
455 int kvm_arch_init(void *opaque)
456 {
457         int rc = -ENOMEM;
458
459         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
460         if (!kvm_s390_dbf)
461                 return -ENOMEM;
462
463         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
464                 goto out;
465
466         kvm_s390_cpu_feat_init();
467
468         /* Register floating interrupt controller interface. */
469         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
470         if (rc) {
471                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
472                 goto out;
473         }
474
475         rc = kvm_s390_gib_init(GAL_ISC);
476         if (rc)
477                 goto out;
478
479         return 0;
480
481 out:
482         kvm_arch_exit();
483         return rc;
484 }
485
486 void kvm_arch_exit(void)
487 {
488         kvm_s390_gib_destroy();
489         debug_unregister(kvm_s390_dbf);
490 }
491
492 /* Section: device related */
493 long kvm_arch_dev_ioctl(struct file *filp,
494                         unsigned int ioctl, unsigned long arg)
495 {
496         if (ioctl == KVM_S390_ENABLE_SIE)
497                 return s390_enable_sie();
498         return -EINVAL;
499 }
500
501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
502 {
503         int r;
504
505         switch (ext) {
506         case KVM_CAP_S390_PSW:
507         case KVM_CAP_S390_GMAP:
508         case KVM_CAP_SYNC_MMU:
509 #ifdef CONFIG_KVM_S390_UCONTROL
510         case KVM_CAP_S390_UCONTROL:
511 #endif
512         case KVM_CAP_ASYNC_PF:
513         case KVM_CAP_SYNC_REGS:
514         case KVM_CAP_ONE_REG:
515         case KVM_CAP_ENABLE_CAP:
516         case KVM_CAP_S390_CSS_SUPPORT:
517         case KVM_CAP_IOEVENTFD:
518         case KVM_CAP_DEVICE_CTRL:
519         case KVM_CAP_S390_IRQCHIP:
520         case KVM_CAP_VM_ATTRIBUTES:
521         case KVM_CAP_MP_STATE:
522         case KVM_CAP_IMMEDIATE_EXIT:
523         case KVM_CAP_S390_INJECT_IRQ:
524         case KVM_CAP_S390_USER_SIGP:
525         case KVM_CAP_S390_USER_STSI:
526         case KVM_CAP_S390_SKEYS:
527         case KVM_CAP_S390_IRQ_STATE:
528         case KVM_CAP_S390_USER_INSTR0:
529         case KVM_CAP_S390_CMMA_MIGRATION:
530         case KVM_CAP_S390_AIS:
531         case KVM_CAP_S390_AIS_MIGRATION:
532         case KVM_CAP_S390_VCPU_RESETS:
533                 r = 1;
534                 break;
535         case KVM_CAP_S390_HPAGE_1M:
536                 r = 0;
537                 if (hpage && !kvm_is_ucontrol(kvm))
538                         r = 1;
539                 break;
540         case KVM_CAP_S390_MEM_OP:
541                 r = MEM_OP_MAX_SIZE;
542                 break;
543         case KVM_CAP_NR_VCPUS:
544         case KVM_CAP_MAX_VCPUS:
545         case KVM_CAP_MAX_VCPU_ID:
546                 r = KVM_S390_BSCA_CPU_SLOTS;
547                 if (!kvm_s390_use_sca_entries())
548                         r = KVM_MAX_VCPUS;
549                 else if (sclp.has_esca && sclp.has_64bscao)
550                         r = KVM_S390_ESCA_CPU_SLOTS;
551                 break;
552         case KVM_CAP_S390_COW:
553                 r = MACHINE_HAS_ESOP;
554                 break;
555         case KVM_CAP_S390_VECTOR_REGISTERS:
556                 r = MACHINE_HAS_VX;
557                 break;
558         case KVM_CAP_S390_RI:
559                 r = test_facility(64);
560                 break;
561         case KVM_CAP_S390_GS:
562                 r = test_facility(133);
563                 break;
564         case KVM_CAP_S390_BPB:
565                 r = test_facility(82);
566                 break;
567         default:
568                 r = 0;
569         }
570         return r;
571 }
572
573 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
574                                     struct kvm_memory_slot *memslot)
575 {
576         int i;
577         gfn_t cur_gfn, last_gfn;
578         unsigned long gaddr, vmaddr;
579         struct gmap *gmap = kvm->arch.gmap;
580         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
581
582         /* Loop over all guest segments */
583         cur_gfn = memslot->base_gfn;
584         last_gfn = memslot->base_gfn + memslot->npages;
585         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
586                 gaddr = gfn_to_gpa(cur_gfn);
587                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
588                 if (kvm_is_error_hva(vmaddr))
589                         continue;
590
591                 bitmap_zero(bitmap, _PAGE_ENTRIES);
592                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
593                 for (i = 0; i < _PAGE_ENTRIES; i++) {
594                         if (test_bit(i, bitmap))
595                                 mark_page_dirty(kvm, cur_gfn + i);
596                 }
597
598                 if (fatal_signal_pending(current))
599                         return;
600                 cond_resched();
601         }
602 }
603
604 /* Section: vm related */
605 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
606
607 /*
608  * Get (and clear) the dirty memory log for a memory slot.
609  */
610 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
611                                struct kvm_dirty_log *log)
612 {
613         int r;
614         unsigned long n;
615         struct kvm_memslots *slots;
616         struct kvm_memory_slot *memslot;
617         int is_dirty = 0;
618
619         if (kvm_is_ucontrol(kvm))
620                 return -EINVAL;
621
622         mutex_lock(&kvm->slots_lock);
623
624         r = -EINVAL;
625         if (log->slot >= KVM_USER_MEM_SLOTS)
626                 goto out;
627
628         slots = kvm_memslots(kvm);
629         memslot = id_to_memslot(slots, log->slot);
630         r = -ENOENT;
631         if (!memslot->dirty_bitmap)
632                 goto out;
633
634         kvm_s390_sync_dirty_log(kvm, memslot);
635         r = kvm_get_dirty_log(kvm, log, &is_dirty);
636         if (r)
637                 goto out;
638
639         /* Clear the dirty log */
640         if (is_dirty) {
641                 n = kvm_dirty_bitmap_bytes(memslot);
642                 memset(memslot->dirty_bitmap, 0, n);
643         }
644         r = 0;
645 out:
646         mutex_unlock(&kvm->slots_lock);
647         return r;
648 }
649
650 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
651 {
652         unsigned int i;
653         struct kvm_vcpu *vcpu;
654
655         kvm_for_each_vcpu(i, vcpu, kvm) {
656                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
657         }
658 }
659
660 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
661 {
662         int r;
663
664         if (cap->flags)
665                 return -EINVAL;
666
667         switch (cap->cap) {
668         case KVM_CAP_S390_IRQCHIP:
669                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
670                 kvm->arch.use_irqchip = 1;
671                 r = 0;
672                 break;
673         case KVM_CAP_S390_USER_SIGP:
674                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
675                 kvm->arch.user_sigp = 1;
676                 r = 0;
677                 break;
678         case KVM_CAP_S390_VECTOR_REGISTERS:
679                 mutex_lock(&kvm->lock);
680                 if (kvm->created_vcpus) {
681                         r = -EBUSY;
682                 } else if (MACHINE_HAS_VX) {
683                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
684                         set_kvm_facility(kvm->arch.model.fac_list, 129);
685                         if (test_facility(134)) {
686                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
687                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
688                         }
689                         if (test_facility(135)) {
690                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
691                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
692                         }
693                         if (test_facility(148)) {
694                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
695                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
696                         }
697                         if (test_facility(152)) {
698                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
699                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
700                         }
701                         r = 0;
702                 } else
703                         r = -EINVAL;
704                 mutex_unlock(&kvm->lock);
705                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
706                          r ? "(not available)" : "(success)");
707                 break;
708         case KVM_CAP_S390_RI:
709                 r = -EINVAL;
710                 mutex_lock(&kvm->lock);
711                 if (kvm->created_vcpus) {
712                         r = -EBUSY;
713                 } else if (test_facility(64)) {
714                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
715                         set_kvm_facility(kvm->arch.model.fac_list, 64);
716                         r = 0;
717                 }
718                 mutex_unlock(&kvm->lock);
719                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
720                          r ? "(not available)" : "(success)");
721                 break;
722         case KVM_CAP_S390_AIS:
723                 mutex_lock(&kvm->lock);
724                 if (kvm->created_vcpus) {
725                         r = -EBUSY;
726                 } else {
727                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
728                         set_kvm_facility(kvm->arch.model.fac_list, 72);
729                         r = 0;
730                 }
731                 mutex_unlock(&kvm->lock);
732                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
733                          r ? "(not available)" : "(success)");
734                 break;
735         case KVM_CAP_S390_GS:
736                 r = -EINVAL;
737                 mutex_lock(&kvm->lock);
738                 if (kvm->created_vcpus) {
739                         r = -EBUSY;
740                 } else if (test_facility(133)) {
741                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
742                         set_kvm_facility(kvm->arch.model.fac_list, 133);
743                         r = 0;
744                 }
745                 mutex_unlock(&kvm->lock);
746                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
747                          r ? "(not available)" : "(success)");
748                 break;
749         case KVM_CAP_S390_HPAGE_1M:
750                 mutex_lock(&kvm->lock);
751                 if (kvm->created_vcpus)
752                         r = -EBUSY;
753                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
754                         r = -EINVAL;
755                 else {
756                         r = 0;
757                         down_write(&kvm->mm->mmap_sem);
758                         kvm->mm->context.allow_gmap_hpage_1m = 1;
759                         up_write(&kvm->mm->mmap_sem);
760                         /*
761                          * We might have to create fake 4k page
762                          * tables. To avoid that the hardware works on
763                          * stale PGSTEs, we emulate these instructions.
764                          */
765                         kvm->arch.use_skf = 0;
766                         kvm->arch.use_pfmfi = 0;
767                 }
768                 mutex_unlock(&kvm->lock);
769                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
770                          r ? "(not available)" : "(success)");
771                 break;
772         case KVM_CAP_S390_USER_STSI:
773                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
774                 kvm->arch.user_stsi = 1;
775                 r = 0;
776                 break;
777         case KVM_CAP_S390_USER_INSTR0:
778                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
779                 kvm->arch.user_instr0 = 1;
780                 icpt_operexc_on_all_vcpus(kvm);
781                 r = 0;
782                 break;
783         default:
784                 r = -EINVAL;
785                 break;
786         }
787         return r;
788 }
789
790 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
791 {
792         int ret;
793
794         switch (attr->attr) {
795         case KVM_S390_VM_MEM_LIMIT_SIZE:
796                 ret = 0;
797                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
798                          kvm->arch.mem_limit);
799                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
800                         ret = -EFAULT;
801                 break;
802         default:
803                 ret = -ENXIO;
804                 break;
805         }
806         return ret;
807 }
808
809 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
810 {
811         int ret;
812         unsigned int idx;
813         switch (attr->attr) {
814         case KVM_S390_VM_MEM_ENABLE_CMMA:
815                 ret = -ENXIO;
816                 if (!sclp.has_cmma)
817                         break;
818
819                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
820                 mutex_lock(&kvm->lock);
821                 if (kvm->created_vcpus)
822                         ret = -EBUSY;
823                 else if (kvm->mm->context.allow_gmap_hpage_1m)
824                         ret = -EINVAL;
825                 else {
826                         kvm->arch.use_cmma = 1;
827                         /* Not compatible with cmma. */
828                         kvm->arch.use_pfmfi = 0;
829                         ret = 0;
830                 }
831                 mutex_unlock(&kvm->lock);
832                 break;
833         case KVM_S390_VM_MEM_CLR_CMMA:
834                 ret = -ENXIO;
835                 if (!sclp.has_cmma)
836                         break;
837                 ret = -EINVAL;
838                 if (!kvm->arch.use_cmma)
839                         break;
840
841                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
842                 mutex_lock(&kvm->lock);
843                 idx = srcu_read_lock(&kvm->srcu);
844                 s390_reset_cmma(kvm->arch.gmap->mm);
845                 srcu_read_unlock(&kvm->srcu, idx);
846                 mutex_unlock(&kvm->lock);
847                 ret = 0;
848                 break;
849         case KVM_S390_VM_MEM_LIMIT_SIZE: {
850                 unsigned long new_limit;
851
852                 if (kvm_is_ucontrol(kvm))
853                         return -EINVAL;
854
855                 if (get_user(new_limit, (u64 __user *)attr->addr))
856                         return -EFAULT;
857
858                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
859                     new_limit > kvm->arch.mem_limit)
860                         return -E2BIG;
861
862                 if (!new_limit)
863                         return -EINVAL;
864
865                 /* gmap_create takes last usable address */
866                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
867                         new_limit -= 1;
868
869                 ret = -EBUSY;
870                 mutex_lock(&kvm->lock);
871                 if (!kvm->created_vcpus) {
872                         /* gmap_create will round the limit up */
873                         struct gmap *new = gmap_create(current->mm, new_limit);
874
875                         if (!new) {
876                                 ret = -ENOMEM;
877                         } else {
878                                 gmap_remove(kvm->arch.gmap);
879                                 new->private = kvm;
880                                 kvm->arch.gmap = new;
881                                 ret = 0;
882                         }
883                 }
884                 mutex_unlock(&kvm->lock);
885                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
886                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
887                          (void *) kvm->arch.gmap->asce);
888                 break;
889         }
890         default:
891                 ret = -ENXIO;
892                 break;
893         }
894         return ret;
895 }
896
897 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
898
899 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
900 {
901         struct kvm_vcpu *vcpu;
902         int i;
903
904         kvm_s390_vcpu_block_all(kvm);
905
906         kvm_for_each_vcpu(i, vcpu, kvm) {
907                 kvm_s390_vcpu_crypto_setup(vcpu);
908                 /* recreate the shadow crycb by leaving the VSIE handler */
909                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
910         }
911
912         kvm_s390_vcpu_unblock_all(kvm);
913 }
914
915 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
916 {
917         mutex_lock(&kvm->lock);
918         switch (attr->attr) {
919         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
920                 if (!test_kvm_facility(kvm, 76)) {
921                         mutex_unlock(&kvm->lock);
922                         return -EINVAL;
923                 }
924                 get_random_bytes(
925                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
926                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
927                 kvm->arch.crypto.aes_kw = 1;
928                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
929                 break;
930         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
931                 if (!test_kvm_facility(kvm, 76)) {
932                         mutex_unlock(&kvm->lock);
933                         return -EINVAL;
934                 }
935                 get_random_bytes(
936                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
937                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
938                 kvm->arch.crypto.dea_kw = 1;
939                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
940                 break;
941         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
942                 if (!test_kvm_facility(kvm, 76)) {
943                         mutex_unlock(&kvm->lock);
944                         return -EINVAL;
945                 }
946                 kvm->arch.crypto.aes_kw = 0;
947                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
948                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
949                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
950                 break;
951         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
952                 if (!test_kvm_facility(kvm, 76)) {
953                         mutex_unlock(&kvm->lock);
954                         return -EINVAL;
955                 }
956                 kvm->arch.crypto.dea_kw = 0;
957                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
958                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
959                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
960                 break;
961         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
962                 if (!ap_instructions_available()) {
963                         mutex_unlock(&kvm->lock);
964                         return -EOPNOTSUPP;
965                 }
966                 kvm->arch.crypto.apie = 1;
967                 break;
968         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
969                 if (!ap_instructions_available()) {
970                         mutex_unlock(&kvm->lock);
971                         return -EOPNOTSUPP;
972                 }
973                 kvm->arch.crypto.apie = 0;
974                 break;
975         default:
976                 mutex_unlock(&kvm->lock);
977                 return -ENXIO;
978         }
979
980         kvm_s390_vcpu_crypto_reset_all(kvm);
981         mutex_unlock(&kvm->lock);
982         return 0;
983 }
984
985 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
986 {
987         int cx;
988         struct kvm_vcpu *vcpu;
989
990         kvm_for_each_vcpu(cx, vcpu, kvm)
991                 kvm_s390_sync_request(req, vcpu);
992 }
993
994 /*
995  * Must be called with kvm->srcu held to avoid races on memslots, and with
996  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
997  */
998 static int kvm_s390_vm_start_migration(struct kvm *kvm)
999 {
1000         struct kvm_memory_slot *ms;
1001         struct kvm_memslots *slots;
1002         unsigned long ram_pages = 0;
1003         int slotnr;
1004
1005         /* migration mode already enabled */
1006         if (kvm->arch.migration_mode)
1007                 return 0;
1008         slots = kvm_memslots(kvm);
1009         if (!slots || !slots->used_slots)
1010                 return -EINVAL;
1011
1012         if (!kvm->arch.use_cmma) {
1013                 kvm->arch.migration_mode = 1;
1014                 return 0;
1015         }
1016         /* mark all the pages in active slots as dirty */
1017         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1018                 ms = slots->memslots + slotnr;
1019                 if (!ms->dirty_bitmap)
1020                         return -EINVAL;
1021                 /*
1022                  * The second half of the bitmap is only used on x86,
1023                  * and would be wasted otherwise, so we put it to good
1024                  * use here to keep track of the state of the storage
1025                  * attributes.
1026                  */
1027                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1028                 ram_pages += ms->npages;
1029         }
1030         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1031         kvm->arch.migration_mode = 1;
1032         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1033         return 0;
1034 }
1035
1036 /*
1037  * Must be called with kvm->slots_lock to avoid races with ourselves and
1038  * kvm_s390_vm_start_migration.
1039  */
1040 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1041 {
1042         /* migration mode already disabled */
1043         if (!kvm->arch.migration_mode)
1044                 return 0;
1045         kvm->arch.migration_mode = 0;
1046         if (kvm->arch.use_cmma)
1047                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1048         return 0;
1049 }
1050
1051 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1052                                      struct kvm_device_attr *attr)
1053 {
1054         int res = -ENXIO;
1055
1056         mutex_lock(&kvm->slots_lock);
1057         switch (attr->attr) {
1058         case KVM_S390_VM_MIGRATION_START:
1059                 res = kvm_s390_vm_start_migration(kvm);
1060                 break;
1061         case KVM_S390_VM_MIGRATION_STOP:
1062                 res = kvm_s390_vm_stop_migration(kvm);
1063                 break;
1064         default:
1065                 break;
1066         }
1067         mutex_unlock(&kvm->slots_lock);
1068
1069         return res;
1070 }
1071
1072 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1073                                      struct kvm_device_attr *attr)
1074 {
1075         u64 mig = kvm->arch.migration_mode;
1076
1077         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1078                 return -ENXIO;
1079
1080         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1081                 return -EFAULT;
1082         return 0;
1083 }
1084
1085 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1086 {
1087         struct kvm_s390_vm_tod_clock gtod;
1088
1089         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1090                 return -EFAULT;
1091
1092         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1093                 return -EINVAL;
1094         kvm_s390_set_tod_clock(kvm, &gtod);
1095
1096         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1097                 gtod.epoch_idx, gtod.tod);
1098
1099         return 0;
1100 }
1101
1102 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1103 {
1104         u8 gtod_high;
1105
1106         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1107                                            sizeof(gtod_high)))
1108                 return -EFAULT;
1109
1110         if (gtod_high != 0)
1111                 return -EINVAL;
1112         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1113
1114         return 0;
1115 }
1116
1117 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119         struct kvm_s390_vm_tod_clock gtod = { 0 };
1120
1121         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1122                            sizeof(gtod.tod)))
1123                 return -EFAULT;
1124
1125         kvm_s390_set_tod_clock(kvm, &gtod);
1126         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1127         return 0;
1128 }
1129
1130 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1131 {
1132         int ret;
1133
1134         if (attr->flags)
1135                 return -EINVAL;
1136
1137         switch (attr->attr) {
1138         case KVM_S390_VM_TOD_EXT:
1139                 ret = kvm_s390_set_tod_ext(kvm, attr);
1140                 break;
1141         case KVM_S390_VM_TOD_HIGH:
1142                 ret = kvm_s390_set_tod_high(kvm, attr);
1143                 break;
1144         case KVM_S390_VM_TOD_LOW:
1145                 ret = kvm_s390_set_tod_low(kvm, attr);
1146                 break;
1147         default:
1148                 ret = -ENXIO;
1149                 break;
1150         }
1151         return ret;
1152 }
1153
1154 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1155                                    struct kvm_s390_vm_tod_clock *gtod)
1156 {
1157         struct kvm_s390_tod_clock_ext htod;
1158
1159         preempt_disable();
1160
1161         get_tod_clock_ext((char *)&htod);
1162
1163         gtod->tod = htod.tod + kvm->arch.epoch;
1164         gtod->epoch_idx = 0;
1165         if (test_kvm_facility(kvm, 139)) {
1166                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1167                 if (gtod->tod < htod.tod)
1168                         gtod->epoch_idx += 1;
1169         }
1170
1171         preempt_enable();
1172 }
1173
1174 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1175 {
1176         struct kvm_s390_vm_tod_clock gtod;
1177
1178         memset(&gtod, 0, sizeof(gtod));
1179         kvm_s390_get_tod_clock(kvm, &gtod);
1180         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1181                 return -EFAULT;
1182
1183         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1184                 gtod.epoch_idx, gtod.tod);
1185         return 0;
1186 }
1187
1188 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1189 {
1190         u8 gtod_high = 0;
1191
1192         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1193                                          sizeof(gtod_high)))
1194                 return -EFAULT;
1195         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1196
1197         return 0;
1198 }
1199
1200 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1201 {
1202         u64 gtod;
1203
1204         gtod = kvm_s390_get_tod_clock_fast(kvm);
1205         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1206                 return -EFAULT;
1207         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1208
1209         return 0;
1210 }
1211
1212 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1213 {
1214         int ret;
1215
1216         if (attr->flags)
1217                 return -EINVAL;
1218
1219         switch (attr->attr) {
1220         case KVM_S390_VM_TOD_EXT:
1221                 ret = kvm_s390_get_tod_ext(kvm, attr);
1222                 break;
1223         case KVM_S390_VM_TOD_HIGH:
1224                 ret = kvm_s390_get_tod_high(kvm, attr);
1225                 break;
1226         case KVM_S390_VM_TOD_LOW:
1227                 ret = kvm_s390_get_tod_low(kvm, attr);
1228                 break;
1229         default:
1230                 ret = -ENXIO;
1231                 break;
1232         }
1233         return ret;
1234 }
1235
1236 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238         struct kvm_s390_vm_cpu_processor *proc;
1239         u16 lowest_ibc, unblocked_ibc;
1240         int ret = 0;
1241
1242         mutex_lock(&kvm->lock);
1243         if (kvm->created_vcpus) {
1244                 ret = -EBUSY;
1245                 goto out;
1246         }
1247         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1248         if (!proc) {
1249                 ret = -ENOMEM;
1250                 goto out;
1251         }
1252         if (!copy_from_user(proc, (void __user *)attr->addr,
1253                             sizeof(*proc))) {
1254                 kvm->arch.model.cpuid = proc->cpuid;
1255                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1256                 unblocked_ibc = sclp.ibc & 0xfff;
1257                 if (lowest_ibc && proc->ibc) {
1258                         if (proc->ibc > unblocked_ibc)
1259                                 kvm->arch.model.ibc = unblocked_ibc;
1260                         else if (proc->ibc < lowest_ibc)
1261                                 kvm->arch.model.ibc = lowest_ibc;
1262                         else
1263                                 kvm->arch.model.ibc = proc->ibc;
1264                 }
1265                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1266                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1267                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1268                          kvm->arch.model.ibc,
1269                          kvm->arch.model.cpuid);
1270                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1271                          kvm->arch.model.fac_list[0],
1272                          kvm->arch.model.fac_list[1],
1273                          kvm->arch.model.fac_list[2]);
1274         } else
1275                 ret = -EFAULT;
1276         kfree(proc);
1277 out:
1278         mutex_unlock(&kvm->lock);
1279         return ret;
1280 }
1281
1282 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1283                                        struct kvm_device_attr *attr)
1284 {
1285         struct kvm_s390_vm_cpu_feat data;
1286
1287         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1288                 return -EFAULT;
1289         if (!bitmap_subset((unsigned long *) data.feat,
1290                            kvm_s390_available_cpu_feat,
1291                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1292                 return -EINVAL;
1293
1294         mutex_lock(&kvm->lock);
1295         if (kvm->created_vcpus) {
1296                 mutex_unlock(&kvm->lock);
1297                 return -EBUSY;
1298         }
1299         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1300                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1301         mutex_unlock(&kvm->lock);
1302         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1303                          data.feat[0],
1304                          data.feat[1],
1305                          data.feat[2]);
1306         return 0;
1307 }
1308
1309 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1310                                           struct kvm_device_attr *attr)
1311 {
1312         mutex_lock(&kvm->lock);
1313         if (kvm->created_vcpus) {
1314                 mutex_unlock(&kvm->lock);
1315                 return -EBUSY;
1316         }
1317
1318         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1319                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1320                 mutex_unlock(&kvm->lock);
1321                 return -EFAULT;
1322         }
1323         mutex_unlock(&kvm->lock);
1324
1325         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1329                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1330         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1332                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1333         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1335                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1336         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1339         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1342         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1345         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1348         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1351         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1354         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1357         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1360         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1363         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1366         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1377         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1382
1383         return 0;
1384 }
1385
1386 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1387 {
1388         int ret = -ENXIO;
1389
1390         switch (attr->attr) {
1391         case KVM_S390_VM_CPU_PROCESSOR:
1392                 ret = kvm_s390_set_processor(kvm, attr);
1393                 break;
1394         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1395                 ret = kvm_s390_set_processor_feat(kvm, attr);
1396                 break;
1397         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1398                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1399                 break;
1400         }
1401         return ret;
1402 }
1403
1404 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1405 {
1406         struct kvm_s390_vm_cpu_processor *proc;
1407         int ret = 0;
1408
1409         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1410         if (!proc) {
1411                 ret = -ENOMEM;
1412                 goto out;
1413         }
1414         proc->cpuid = kvm->arch.model.cpuid;
1415         proc->ibc = kvm->arch.model.ibc;
1416         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1417                S390_ARCH_FAC_LIST_SIZE_BYTE);
1418         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1419                  kvm->arch.model.ibc,
1420                  kvm->arch.model.cpuid);
1421         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1422                  kvm->arch.model.fac_list[0],
1423                  kvm->arch.model.fac_list[1],
1424                  kvm->arch.model.fac_list[2]);
1425         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1426                 ret = -EFAULT;
1427         kfree(proc);
1428 out:
1429         return ret;
1430 }
1431
1432 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1433 {
1434         struct kvm_s390_vm_cpu_machine *mach;
1435         int ret = 0;
1436
1437         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1438         if (!mach) {
1439                 ret = -ENOMEM;
1440                 goto out;
1441         }
1442         get_cpu_id((struct cpuid *) &mach->cpuid);
1443         mach->ibc = sclp.ibc;
1444         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1445                S390_ARCH_FAC_LIST_SIZE_BYTE);
1446         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1447                sizeof(S390_lowcore.stfle_fac_list));
1448         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1449                  kvm->arch.model.ibc,
1450                  kvm->arch.model.cpuid);
1451         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1452                  mach->fac_mask[0],
1453                  mach->fac_mask[1],
1454                  mach->fac_mask[2]);
1455         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1456                  mach->fac_list[0],
1457                  mach->fac_list[1],
1458                  mach->fac_list[2]);
1459         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1460                 ret = -EFAULT;
1461         kfree(mach);
1462 out:
1463         return ret;
1464 }
1465
1466 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1467                                        struct kvm_device_attr *attr)
1468 {
1469         struct kvm_s390_vm_cpu_feat data;
1470
1471         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1472                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1473         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1474                 return -EFAULT;
1475         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1476                          data.feat[0],
1477                          data.feat[1],
1478                          data.feat[2]);
1479         return 0;
1480 }
1481
1482 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1483                                      struct kvm_device_attr *attr)
1484 {
1485         struct kvm_s390_vm_cpu_feat data;
1486
1487         bitmap_copy((unsigned long *) data.feat,
1488                     kvm_s390_available_cpu_feat,
1489                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1490         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1491                 return -EFAULT;
1492         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1493                          data.feat[0],
1494                          data.feat[1],
1495                          data.feat[2]);
1496         return 0;
1497 }
1498
1499 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1500                                           struct kvm_device_attr *attr)
1501 {
1502         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1503             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1504                 return -EFAULT;
1505
1506         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1510                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1511         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1513                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1514         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1516                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1517         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1520         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1523         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1526         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1529         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1532         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1535         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1538         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1541         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1544         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1547         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1558         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1563
1564         return 0;
1565 }
1566
1567 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1568                                         struct kvm_device_attr *attr)
1569 {
1570         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1571             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1572                 return -EFAULT;
1573
1574         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1577                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1578                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1579         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1580                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1581                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1582         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1583                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1584                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1585         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1588         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1589                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1590                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1591         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1593                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1594         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1596                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1597         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1599                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1600         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1603         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1605                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1606         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1608                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1609         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1611                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1612         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1615         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1625                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1626         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1627                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1630                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1631
1632         return 0;
1633 }
1634
1635 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1636 {
1637         int ret = -ENXIO;
1638
1639         switch (attr->attr) {
1640         case KVM_S390_VM_CPU_PROCESSOR:
1641                 ret = kvm_s390_get_processor(kvm, attr);
1642                 break;
1643         case KVM_S390_VM_CPU_MACHINE:
1644                 ret = kvm_s390_get_machine(kvm, attr);
1645                 break;
1646         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1647                 ret = kvm_s390_get_processor_feat(kvm, attr);
1648                 break;
1649         case KVM_S390_VM_CPU_MACHINE_FEAT:
1650                 ret = kvm_s390_get_machine_feat(kvm, attr);
1651                 break;
1652         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1653                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1654                 break;
1655         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1656                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1657                 break;
1658         }
1659         return ret;
1660 }
1661
1662 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1663 {
1664         int ret;
1665
1666         switch (attr->group) {
1667         case KVM_S390_VM_MEM_CTRL:
1668                 ret = kvm_s390_set_mem_control(kvm, attr);
1669                 break;
1670         case KVM_S390_VM_TOD:
1671                 ret = kvm_s390_set_tod(kvm, attr);
1672                 break;
1673         case KVM_S390_VM_CPU_MODEL:
1674                 ret = kvm_s390_set_cpu_model(kvm, attr);
1675                 break;
1676         case KVM_S390_VM_CRYPTO:
1677                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_MIGRATION:
1680                 ret = kvm_s390_vm_set_migration(kvm, attr);
1681                 break;
1682         default:
1683                 ret = -ENXIO;
1684                 break;
1685         }
1686
1687         return ret;
1688 }
1689
1690 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1691 {
1692         int ret;
1693
1694         switch (attr->group) {
1695         case KVM_S390_VM_MEM_CTRL:
1696                 ret = kvm_s390_get_mem_control(kvm, attr);
1697                 break;
1698         case KVM_S390_VM_TOD:
1699                 ret = kvm_s390_get_tod(kvm, attr);
1700                 break;
1701         case KVM_S390_VM_CPU_MODEL:
1702                 ret = kvm_s390_get_cpu_model(kvm, attr);
1703                 break;
1704         case KVM_S390_VM_MIGRATION:
1705                 ret = kvm_s390_vm_get_migration(kvm, attr);
1706                 break;
1707         default:
1708                 ret = -ENXIO;
1709                 break;
1710         }
1711
1712         return ret;
1713 }
1714
1715 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1716 {
1717         int ret;
1718
1719         switch (attr->group) {
1720         case KVM_S390_VM_MEM_CTRL:
1721                 switch (attr->attr) {
1722                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1723                 case KVM_S390_VM_MEM_CLR_CMMA:
1724                         ret = sclp.has_cmma ? 0 : -ENXIO;
1725                         break;
1726                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1727                         ret = 0;
1728                         break;
1729                 default:
1730                         ret = -ENXIO;
1731                         break;
1732                 }
1733                 break;
1734         case KVM_S390_VM_TOD:
1735                 switch (attr->attr) {
1736                 case KVM_S390_VM_TOD_LOW:
1737                 case KVM_S390_VM_TOD_HIGH:
1738                         ret = 0;
1739                         break;
1740                 default:
1741                         ret = -ENXIO;
1742                         break;
1743                 }
1744                 break;
1745         case KVM_S390_VM_CPU_MODEL:
1746                 switch (attr->attr) {
1747                 case KVM_S390_VM_CPU_PROCESSOR:
1748                 case KVM_S390_VM_CPU_MACHINE:
1749                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1750                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1751                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1752                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1753                         ret = 0;
1754                         break;
1755                 default:
1756                         ret = -ENXIO;
1757                         break;
1758                 }
1759                 break;
1760         case KVM_S390_VM_CRYPTO:
1761                 switch (attr->attr) {
1762                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1763                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1765                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1766                         ret = 0;
1767                         break;
1768                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1769                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1770                         ret = ap_instructions_available() ? 0 : -ENXIO;
1771                         break;
1772                 default:
1773                         ret = -ENXIO;
1774                         break;
1775                 }
1776                 break;
1777         case KVM_S390_VM_MIGRATION:
1778                 ret = 0;
1779                 break;
1780         default:
1781                 ret = -ENXIO;
1782                 break;
1783         }
1784
1785         return ret;
1786 }
1787
1788 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1789 {
1790         uint8_t *keys;
1791         uint64_t hva;
1792         int srcu_idx, i, r = 0;
1793
1794         if (args->flags != 0)
1795                 return -EINVAL;
1796
1797         /* Is this guest using storage keys? */
1798         if (!mm_uses_skeys(current->mm))
1799                 return KVM_S390_GET_SKEYS_NONE;
1800
1801         /* Enforce sane limit on memory allocation */
1802         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1803                 return -EINVAL;
1804
1805         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1806         if (!keys)
1807                 return -ENOMEM;
1808
1809         down_read(&current->mm->mmap_sem);
1810         srcu_idx = srcu_read_lock(&kvm->srcu);
1811         for (i = 0; i < args->count; i++) {
1812                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1813                 if (kvm_is_error_hva(hva)) {
1814                         r = -EFAULT;
1815                         break;
1816                 }
1817
1818                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1819                 if (r)
1820                         break;
1821         }
1822         srcu_read_unlock(&kvm->srcu, srcu_idx);
1823         up_read(&current->mm->mmap_sem);
1824
1825         if (!r) {
1826                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1827                                  sizeof(uint8_t) * args->count);
1828                 if (r)
1829                         r = -EFAULT;
1830         }
1831
1832         kvfree(keys);
1833         return r;
1834 }
1835
1836 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1837 {
1838         uint8_t *keys;
1839         uint64_t hva;
1840         int srcu_idx, i, r = 0;
1841         bool unlocked;
1842
1843         if (args->flags != 0)
1844                 return -EINVAL;
1845
1846         /* Enforce sane limit on memory allocation */
1847         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1848                 return -EINVAL;
1849
1850         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1851         if (!keys)
1852                 return -ENOMEM;
1853
1854         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1855                            sizeof(uint8_t) * args->count);
1856         if (r) {
1857                 r = -EFAULT;
1858                 goto out;
1859         }
1860
1861         /* Enable storage key handling for the guest */
1862         r = s390_enable_skey();
1863         if (r)
1864                 goto out;
1865
1866         i = 0;
1867         down_read(&current->mm->mmap_sem);
1868         srcu_idx = srcu_read_lock(&kvm->srcu);
1869         while (i < args->count) {
1870                 unlocked = false;
1871                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1872                 if (kvm_is_error_hva(hva)) {
1873                         r = -EFAULT;
1874                         break;
1875                 }
1876
1877                 /* Lowest order bit is reserved */
1878                 if (keys[i] & 0x01) {
1879                         r = -EINVAL;
1880                         break;
1881                 }
1882
1883                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1884                 if (r) {
1885                         r = fixup_user_fault(current, current->mm, hva,
1886                                              FAULT_FLAG_WRITE, &unlocked);
1887                         if (r)
1888                                 break;
1889                 }
1890                 if (!r)
1891                         i++;
1892         }
1893         srcu_read_unlock(&kvm->srcu, srcu_idx);
1894         up_read(&current->mm->mmap_sem);
1895 out:
1896         kvfree(keys);
1897         return r;
1898 }
1899
1900 /*
1901  * Base address and length must be sent at the start of each block, therefore
1902  * it's cheaper to send some clean data, as long as it's less than the size of
1903  * two longs.
1904  */
1905 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1906 /* for consistency */
1907 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1908
1909 /*
1910  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1911  * address falls in a hole. In that case the index of one of the memslots
1912  * bordering the hole is returned.
1913  */
1914 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1915 {
1916         int start = 0, end = slots->used_slots;
1917         int slot = atomic_read(&slots->lru_slot);
1918         struct kvm_memory_slot *memslots = slots->memslots;
1919
1920         if (gfn >= memslots[slot].base_gfn &&
1921             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1922                 return slot;
1923
1924         while (start < end) {
1925                 slot = start + (end - start) / 2;
1926
1927                 if (gfn >= memslots[slot].base_gfn)
1928                         end = slot;
1929                 else
1930                         start = slot + 1;
1931         }
1932
1933         if (gfn >= memslots[start].base_gfn &&
1934             gfn < memslots[start].base_gfn + memslots[start].npages) {
1935                 atomic_set(&slots->lru_slot, start);
1936         }
1937
1938         return start;
1939 }
1940
1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1942                               u8 *res, unsigned long bufsize)
1943 {
1944         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1945
1946         args->count = 0;
1947         while (args->count < bufsize) {
1948                 hva = gfn_to_hva(kvm, cur_gfn);
1949                 /*
1950                  * We return an error if the first value was invalid, but we
1951                  * return successfully if at least one value was copied.
1952                  */
1953                 if (kvm_is_error_hva(hva))
1954                         return args->count ? 0 : -EFAULT;
1955                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1956                         pgstev = 0;
1957                 res[args->count++] = (pgstev >> 24) & 0x43;
1958                 cur_gfn++;
1959         }
1960
1961         return 0;
1962 }
1963
1964 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1965                                               unsigned long cur_gfn)
1966 {
1967         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1968         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1969         unsigned long ofs = cur_gfn - ms->base_gfn;
1970
1971         if (ms->base_gfn + ms->npages <= cur_gfn) {
1972                 slotidx--;
1973                 /* If we are above the highest slot, wrap around */
1974                 if (slotidx < 0)
1975                         slotidx = slots->used_slots - 1;
1976
1977                 ms = slots->memslots + slotidx;
1978                 ofs = 0;
1979         }
1980         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1981         while ((slotidx > 0) && (ofs >= ms->npages)) {
1982                 slotidx--;
1983                 ms = slots->memslots + slotidx;
1984                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1985         }
1986         return ms->base_gfn + ofs;
1987 }
1988
1989 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1990                              u8 *res, unsigned long bufsize)
1991 {
1992         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1993         struct kvm_memslots *slots = kvm_memslots(kvm);
1994         struct kvm_memory_slot *ms;
1995
1996         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1997         ms = gfn_to_memslot(kvm, cur_gfn);
1998         args->count = 0;
1999         args->start_gfn = cur_gfn;
2000         if (!ms)
2001                 return 0;
2002         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2003         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2004
2005         while (args->count < bufsize) {
2006                 hva = gfn_to_hva(kvm, cur_gfn);
2007                 if (kvm_is_error_hva(hva))
2008                         return 0;
2009                 /* Decrement only if we actually flipped the bit to 0 */
2010                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2011                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2012                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2013                         pgstev = 0;
2014                 /* Save the value */
2015                 res[args->count++] = (pgstev >> 24) & 0x43;
2016                 /* If the next bit is too far away, stop. */
2017                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2018                         return 0;
2019                 /* If we reached the previous "next", find the next one */
2020                 if (cur_gfn == next_gfn)
2021                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2022                 /* Reached the end of memory or of the buffer, stop */
2023                 if ((next_gfn >= mem_end) ||
2024                     (next_gfn - args->start_gfn >= bufsize))
2025                         return 0;
2026                 cur_gfn++;
2027                 /* Reached the end of the current memslot, take the next one. */
2028                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2029                         ms = gfn_to_memslot(kvm, cur_gfn);
2030                         if (!ms)
2031                                 return 0;
2032                 }
2033         }
2034         return 0;
2035 }
2036
2037 /*
2038  * This function searches for the next page with dirty CMMA attributes, and
2039  * saves the attributes in the buffer up to either the end of the buffer or
2040  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2041  * no trailing clean bytes are saved.
2042  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2043  * output buffer will indicate 0 as length.
2044  */
2045 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2046                                   struct kvm_s390_cmma_log *args)
2047 {
2048         unsigned long bufsize;
2049         int srcu_idx, peek, ret;
2050         u8 *values;
2051
2052         if (!kvm->arch.use_cmma)
2053                 return -ENXIO;
2054         /* Invalid/unsupported flags were specified */
2055         if (args->flags & ~KVM_S390_CMMA_PEEK)
2056                 return -EINVAL;
2057         /* Migration mode query, and we are not doing a migration */
2058         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2059         if (!peek && !kvm->arch.migration_mode)
2060                 return -EINVAL;
2061         /* CMMA is disabled or was not used, or the buffer has length zero */
2062         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2063         if (!bufsize || !kvm->mm->context.uses_cmm) {
2064                 memset(args, 0, sizeof(*args));
2065                 return 0;
2066         }
2067         /* We are not peeking, and there are no dirty pages */
2068         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2069                 memset(args, 0, sizeof(*args));
2070                 return 0;
2071         }
2072
2073         values = vmalloc(bufsize);
2074         if (!values)
2075                 return -ENOMEM;
2076
2077         down_read(&kvm->mm->mmap_sem);
2078         srcu_idx = srcu_read_lock(&kvm->srcu);
2079         if (peek)
2080                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2081         else
2082                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2083         srcu_read_unlock(&kvm->srcu, srcu_idx);
2084         up_read(&kvm->mm->mmap_sem);
2085
2086         if (kvm->arch.migration_mode)
2087                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2088         else
2089                 args->remaining = 0;
2090
2091         if (copy_to_user((void __user *)args->values, values, args->count))
2092                 ret = -EFAULT;
2093
2094         vfree(values);
2095         return ret;
2096 }
2097
2098 /*
2099  * This function sets the CMMA attributes for the given pages. If the input
2100  * buffer has zero length, no action is taken, otherwise the attributes are
2101  * set and the mm->context.uses_cmm flag is set.
2102  */
2103 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2104                                   const struct kvm_s390_cmma_log *args)
2105 {
2106         unsigned long hva, mask, pgstev, i;
2107         uint8_t *bits;
2108         int srcu_idx, r = 0;
2109
2110         mask = args->mask;
2111
2112         if (!kvm->arch.use_cmma)
2113                 return -ENXIO;
2114         /* invalid/unsupported flags */
2115         if (args->flags != 0)
2116                 return -EINVAL;
2117         /* Enforce sane limit on memory allocation */
2118         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2119                 return -EINVAL;
2120         /* Nothing to do */
2121         if (args->count == 0)
2122                 return 0;
2123
2124         bits = vmalloc(array_size(sizeof(*bits), args->count));
2125         if (!bits)
2126                 return -ENOMEM;
2127
2128         r = copy_from_user(bits, (void __user *)args->values, args->count);
2129         if (r) {
2130                 r = -EFAULT;
2131                 goto out;
2132         }
2133
2134         down_read(&kvm->mm->mmap_sem);
2135         srcu_idx = srcu_read_lock(&kvm->srcu);
2136         for (i = 0; i < args->count; i++) {
2137                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2138                 if (kvm_is_error_hva(hva)) {
2139                         r = -EFAULT;
2140                         break;
2141                 }
2142
2143                 pgstev = bits[i];
2144                 pgstev = pgstev << 24;
2145                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2146                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2147         }
2148         srcu_read_unlock(&kvm->srcu, srcu_idx);
2149         up_read(&kvm->mm->mmap_sem);
2150
2151         if (!kvm->mm->context.uses_cmm) {
2152                 down_write(&kvm->mm->mmap_sem);
2153                 kvm->mm->context.uses_cmm = 1;
2154                 up_write(&kvm->mm->mmap_sem);
2155         }
2156 out:
2157         vfree(bits);
2158         return r;
2159 }
2160
2161 long kvm_arch_vm_ioctl(struct file *filp,
2162                        unsigned int ioctl, unsigned long arg)
2163 {
2164         struct kvm *kvm = filp->private_data;
2165         void __user *argp = (void __user *)arg;
2166         struct kvm_device_attr attr;
2167         int r;
2168
2169         switch (ioctl) {
2170         case KVM_S390_INTERRUPT: {
2171                 struct kvm_s390_interrupt s390int;
2172
2173                 r = -EFAULT;
2174                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2175                         break;
2176                 r = kvm_s390_inject_vm(kvm, &s390int);
2177                 break;
2178         }
2179         case KVM_CREATE_IRQCHIP: {
2180                 struct kvm_irq_routing_entry routing;
2181
2182                 r = -EINVAL;
2183                 if (kvm->arch.use_irqchip) {
2184                         /* Set up dummy routing. */
2185                         memset(&routing, 0, sizeof(routing));
2186                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2187                 }
2188                 break;
2189         }
2190         case KVM_SET_DEVICE_ATTR: {
2191                 r = -EFAULT;
2192                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2193                         break;
2194                 r = kvm_s390_vm_set_attr(kvm, &attr);
2195                 break;
2196         }
2197         case KVM_GET_DEVICE_ATTR: {
2198                 r = -EFAULT;
2199                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2200                         break;
2201                 r = kvm_s390_vm_get_attr(kvm, &attr);
2202                 break;
2203         }
2204         case KVM_HAS_DEVICE_ATTR: {
2205                 r = -EFAULT;
2206                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2207                         break;
2208                 r = kvm_s390_vm_has_attr(kvm, &attr);
2209                 break;
2210         }
2211         case KVM_S390_GET_SKEYS: {
2212                 struct kvm_s390_skeys args;
2213
2214                 r = -EFAULT;
2215                 if (copy_from_user(&args, argp,
2216                                    sizeof(struct kvm_s390_skeys)))
2217                         break;
2218                 r = kvm_s390_get_skeys(kvm, &args);
2219                 break;
2220         }
2221         case KVM_S390_SET_SKEYS: {
2222                 struct kvm_s390_skeys args;
2223
2224                 r = -EFAULT;
2225                 if (copy_from_user(&args, argp,
2226                                    sizeof(struct kvm_s390_skeys)))
2227                         break;
2228                 r = kvm_s390_set_skeys(kvm, &args);
2229                 break;
2230         }
2231         case KVM_S390_GET_CMMA_BITS: {
2232                 struct kvm_s390_cmma_log args;
2233
2234                 r = -EFAULT;
2235                 if (copy_from_user(&args, argp, sizeof(args)))
2236                         break;
2237                 mutex_lock(&kvm->slots_lock);
2238                 r = kvm_s390_get_cmma_bits(kvm, &args);
2239                 mutex_unlock(&kvm->slots_lock);
2240                 if (!r) {
2241                         r = copy_to_user(argp, &args, sizeof(args));
2242                         if (r)
2243                                 r = -EFAULT;
2244                 }
2245                 break;
2246         }
2247         case KVM_S390_SET_CMMA_BITS: {
2248                 struct kvm_s390_cmma_log args;
2249
2250                 r = -EFAULT;
2251                 if (copy_from_user(&args, argp, sizeof(args)))
2252                         break;
2253                 mutex_lock(&kvm->slots_lock);
2254                 r = kvm_s390_set_cmma_bits(kvm, &args);
2255                 mutex_unlock(&kvm->slots_lock);
2256                 break;
2257         }
2258         default:
2259                 r = -ENOTTY;
2260         }
2261
2262         return r;
2263 }
2264
2265 static int kvm_s390_apxa_installed(void)
2266 {
2267         struct ap_config_info info;
2268
2269         if (ap_instructions_available()) {
2270                 if (ap_qci(&info) == 0)
2271                         return info.apxa;
2272         }
2273
2274         return 0;
2275 }
2276
2277 /*
2278  * The format of the crypto control block (CRYCB) is specified in the 3 low
2279  * order bits of the CRYCB designation (CRYCBD) field as follows:
2280  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2281  *           AP extended addressing (APXA) facility are installed.
2282  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2283  * Format 2: Both the APXA and MSAX3 facilities are installed
2284  */
2285 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2286 {
2287         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2288
2289         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2290         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2291
2292         /* Check whether MSAX3 is installed */
2293         if (!test_kvm_facility(kvm, 76))
2294                 return;
2295
2296         if (kvm_s390_apxa_installed())
2297                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2298         else
2299                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2300 }
2301
2302 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2303                                unsigned long *aqm, unsigned long *adm)
2304 {
2305         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2306
2307         mutex_lock(&kvm->lock);
2308         kvm_s390_vcpu_block_all(kvm);
2309
2310         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2311         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2312                 memcpy(crycb->apcb1.apm, apm, 32);
2313                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2314                          apm[0], apm[1], apm[2], apm[3]);
2315                 memcpy(crycb->apcb1.aqm, aqm, 32);
2316                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2317                          aqm[0], aqm[1], aqm[2], aqm[3]);
2318                 memcpy(crycb->apcb1.adm, adm, 32);
2319                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2320                          adm[0], adm[1], adm[2], adm[3]);
2321                 break;
2322         case CRYCB_FORMAT1:
2323         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2324                 memcpy(crycb->apcb0.apm, apm, 8);
2325                 memcpy(crycb->apcb0.aqm, aqm, 2);
2326                 memcpy(crycb->apcb0.adm, adm, 2);
2327                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2328                          apm[0], *((unsigned short *)aqm),
2329                          *((unsigned short *)adm));
2330                 break;
2331         default:        /* Can not happen */
2332                 break;
2333         }
2334
2335         /* recreate the shadow crycb for each vcpu */
2336         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2337         kvm_s390_vcpu_unblock_all(kvm);
2338         mutex_unlock(&kvm->lock);
2339 }
2340 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2341
2342 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2343 {
2344         mutex_lock(&kvm->lock);
2345         kvm_s390_vcpu_block_all(kvm);
2346
2347         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2348                sizeof(kvm->arch.crypto.crycb->apcb0));
2349         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2350                sizeof(kvm->arch.crypto.crycb->apcb1));
2351
2352         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2353         /* recreate the shadow crycb for each vcpu */
2354         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2355         kvm_s390_vcpu_unblock_all(kvm);
2356         mutex_unlock(&kvm->lock);
2357 }
2358 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2359
2360 static u64 kvm_s390_get_initial_cpuid(void)
2361 {
2362         struct cpuid cpuid;
2363
2364         get_cpu_id(&cpuid);
2365         cpuid.version = 0xff;
2366         return *((u64 *) &cpuid);
2367 }
2368
2369 static void kvm_s390_crypto_init(struct kvm *kvm)
2370 {
2371         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2372         kvm_s390_set_crycb_format(kvm);
2373
2374         if (!test_kvm_facility(kvm, 76))
2375                 return;
2376
2377         /* Enable AES/DEA protected key functions by default */
2378         kvm->arch.crypto.aes_kw = 1;
2379         kvm->arch.crypto.dea_kw = 1;
2380         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2381                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2382         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2383                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2384 }
2385
2386 static void sca_dispose(struct kvm *kvm)
2387 {
2388         if (kvm->arch.use_esca)
2389                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2390         else
2391                 free_page((unsigned long)(kvm->arch.sca));
2392         kvm->arch.sca = NULL;
2393 }
2394
2395 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2396 {
2397         gfp_t alloc_flags = GFP_KERNEL;
2398         int i, rc;
2399         char debug_name[16];
2400         static unsigned long sca_offset;
2401
2402         rc = -EINVAL;
2403 #ifdef CONFIG_KVM_S390_UCONTROL
2404         if (type & ~KVM_VM_S390_UCONTROL)
2405                 goto out_err;
2406         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2407                 goto out_err;
2408 #else
2409         if (type)
2410                 goto out_err;
2411 #endif
2412
2413         rc = s390_enable_sie();
2414         if (rc)
2415                 goto out_err;
2416
2417         rc = -ENOMEM;
2418
2419         if (!sclp.has_64bscao)
2420                 alloc_flags |= GFP_DMA;
2421         rwlock_init(&kvm->arch.sca_lock);
2422         /* start with basic SCA */
2423         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2424         if (!kvm->arch.sca)
2425                 goto out_err;
2426         mutex_lock(&kvm_lock);
2427         sca_offset += 16;
2428         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2429                 sca_offset = 0;
2430         kvm->arch.sca = (struct bsca_block *)
2431                         ((char *) kvm->arch.sca + sca_offset);
2432         mutex_unlock(&kvm_lock);
2433
2434         sprintf(debug_name, "kvm-%u", current->pid);
2435
2436         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2437         if (!kvm->arch.dbf)
2438                 goto out_err;
2439
2440         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2441         kvm->arch.sie_page2 =
2442              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2443         if (!kvm->arch.sie_page2)
2444                 goto out_err;
2445
2446         kvm->arch.sie_page2->kvm = kvm;
2447         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2448
2449         for (i = 0; i < kvm_s390_fac_size(); i++) {
2450                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2451                                               (kvm_s390_fac_base[i] |
2452                                                kvm_s390_fac_ext[i]);
2453                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2454                                               kvm_s390_fac_base[i];
2455         }
2456         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2457
2458         /* we are always in czam mode - even on pre z14 machines */
2459         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2460         set_kvm_facility(kvm->arch.model.fac_list, 138);
2461         /* we emulate STHYI in kvm */
2462         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2463         set_kvm_facility(kvm->arch.model.fac_list, 74);
2464         if (MACHINE_HAS_TLB_GUEST) {
2465                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2466                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2467         }
2468
2469         if (css_general_characteristics.aiv && test_facility(65))
2470                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2471
2472         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2473         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2474
2475         kvm_s390_crypto_init(kvm);
2476
2477         mutex_init(&kvm->arch.float_int.ais_lock);
2478         spin_lock_init(&kvm->arch.float_int.lock);
2479         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2480                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2481         init_waitqueue_head(&kvm->arch.ipte_wq);
2482         mutex_init(&kvm->arch.ipte_mutex);
2483
2484         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2485         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2486
2487         if (type & KVM_VM_S390_UCONTROL) {
2488                 kvm->arch.gmap = NULL;
2489                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2490         } else {
2491                 if (sclp.hamax == U64_MAX)
2492                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2493                 else
2494                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2495                                                     sclp.hamax + 1);
2496                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2497                 if (!kvm->arch.gmap)
2498                         goto out_err;
2499                 kvm->arch.gmap->private = kvm;
2500                 kvm->arch.gmap->pfault_enabled = 0;
2501         }
2502
2503         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2504         kvm->arch.use_skf = sclp.has_skey;
2505         spin_lock_init(&kvm->arch.start_stop_lock);
2506         kvm_s390_vsie_init(kvm);
2507         kvm_s390_gisa_init(kvm);
2508         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2509
2510         return 0;
2511 out_err:
2512         free_page((unsigned long)kvm->arch.sie_page2);
2513         debug_unregister(kvm->arch.dbf);
2514         sca_dispose(kvm);
2515         KVM_EVENT(3, "creation of vm failed: %d", rc);
2516         return rc;
2517 }
2518
2519 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2520 {
2521         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2522         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2523         kvm_s390_clear_local_irqs(vcpu);
2524         kvm_clear_async_pf_completion_queue(vcpu);
2525         if (!kvm_is_ucontrol(vcpu->kvm))
2526                 sca_del_vcpu(vcpu);
2527
2528         if (kvm_is_ucontrol(vcpu->kvm))
2529                 gmap_remove(vcpu->arch.gmap);
2530
2531         if (vcpu->kvm->arch.use_cmma)
2532                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2533         free_page((unsigned long)(vcpu->arch.sie_block));
2534 }
2535
2536 static void kvm_free_vcpus(struct kvm *kvm)
2537 {
2538         unsigned int i;
2539         struct kvm_vcpu *vcpu;
2540
2541         kvm_for_each_vcpu(i, vcpu, kvm)
2542                 kvm_vcpu_destroy(vcpu);
2543
2544         mutex_lock(&kvm->lock);
2545         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2546                 kvm->vcpus[i] = NULL;
2547
2548         atomic_set(&kvm->online_vcpus, 0);
2549         mutex_unlock(&kvm->lock);
2550 }
2551
2552 void kvm_arch_destroy_vm(struct kvm *kvm)
2553 {
2554         kvm_free_vcpus(kvm);
2555         sca_dispose(kvm);
2556         debug_unregister(kvm->arch.dbf);
2557         kvm_s390_gisa_destroy(kvm);
2558         free_page((unsigned long)kvm->arch.sie_page2);
2559         if (!kvm_is_ucontrol(kvm))
2560                 gmap_remove(kvm->arch.gmap);
2561         kvm_s390_destroy_adapters(kvm);
2562         kvm_s390_clear_float_irqs(kvm);
2563         kvm_s390_vsie_destroy(kvm);
2564         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2565 }
2566
2567 /* Section: vcpu related */
2568 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2569 {
2570         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2571         if (!vcpu->arch.gmap)
2572                 return -ENOMEM;
2573         vcpu->arch.gmap->private = vcpu->kvm;
2574
2575         return 0;
2576 }
2577
2578 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2579 {
2580         if (!kvm_s390_use_sca_entries())
2581                 return;
2582         read_lock(&vcpu->kvm->arch.sca_lock);
2583         if (vcpu->kvm->arch.use_esca) {
2584                 struct esca_block *sca = vcpu->kvm->arch.sca;
2585
2586                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2587                 sca->cpu[vcpu->vcpu_id].sda = 0;
2588         } else {
2589                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2590
2591                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2592                 sca->cpu[vcpu->vcpu_id].sda = 0;
2593         }
2594         read_unlock(&vcpu->kvm->arch.sca_lock);
2595 }
2596
2597 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2598 {
2599         if (!kvm_s390_use_sca_entries()) {
2600                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2601
2602                 /* we still need the basic sca for the ipte control */
2603                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2604                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2605                 return;
2606         }
2607         read_lock(&vcpu->kvm->arch.sca_lock);
2608         if (vcpu->kvm->arch.use_esca) {
2609                 struct esca_block *sca = vcpu->kvm->arch.sca;
2610
2611                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2612                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2613                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2614                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2615                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2616         } else {
2617                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2618
2619                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2620                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2621                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2622                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2623         }
2624         read_unlock(&vcpu->kvm->arch.sca_lock);
2625 }
2626
2627 /* Basic SCA to Extended SCA data copy routines */
2628 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2629 {
2630         d->sda = s->sda;
2631         d->sigp_ctrl.c = s->sigp_ctrl.c;
2632         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2633 }
2634
2635 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2636 {
2637         int i;
2638
2639         d->ipte_control = s->ipte_control;
2640         d->mcn[0] = s->mcn;
2641         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2642                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2643 }
2644
2645 static int sca_switch_to_extended(struct kvm *kvm)
2646 {
2647         struct bsca_block *old_sca = kvm->arch.sca;
2648         struct esca_block *new_sca;
2649         struct kvm_vcpu *vcpu;
2650         unsigned int vcpu_idx;
2651         u32 scaol, scaoh;
2652
2653         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2654         if (!new_sca)
2655                 return -ENOMEM;
2656
2657         scaoh = (u32)((u64)(new_sca) >> 32);
2658         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2659
2660         kvm_s390_vcpu_block_all(kvm);
2661         write_lock(&kvm->arch.sca_lock);
2662
2663         sca_copy_b_to_e(new_sca, old_sca);
2664
2665         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2666                 vcpu->arch.sie_block->scaoh = scaoh;
2667                 vcpu->arch.sie_block->scaol = scaol;
2668                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2669         }
2670         kvm->arch.sca = new_sca;
2671         kvm->arch.use_esca = 1;
2672
2673         write_unlock(&kvm->arch.sca_lock);
2674         kvm_s390_vcpu_unblock_all(kvm);
2675
2676         free_page((unsigned long)old_sca);
2677
2678         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2679                  old_sca, kvm->arch.sca);
2680         return 0;
2681 }
2682
2683 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2684 {
2685         int rc;
2686
2687         if (!kvm_s390_use_sca_entries()) {
2688                 if (id < KVM_MAX_VCPUS)
2689                         return true;
2690                 return false;
2691         }
2692         if (id < KVM_S390_BSCA_CPU_SLOTS)
2693                 return true;
2694         if (!sclp.has_esca || !sclp.has_64bscao)
2695                 return false;
2696
2697         mutex_lock(&kvm->lock);
2698         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2699         mutex_unlock(&kvm->lock);
2700
2701         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2702 }
2703
2704 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2705 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2706 {
2707         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2708         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2709         vcpu->arch.cputm_start = get_tod_clock_fast();
2710         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2711 }
2712
2713 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2714 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2715 {
2716         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2717         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2718         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2719         vcpu->arch.cputm_start = 0;
2720         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2721 }
2722
2723 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2724 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2725 {
2726         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2727         vcpu->arch.cputm_enabled = true;
2728         __start_cpu_timer_accounting(vcpu);
2729 }
2730
2731 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2732 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2733 {
2734         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2735         __stop_cpu_timer_accounting(vcpu);
2736         vcpu->arch.cputm_enabled = false;
2737 }
2738
2739 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2740 {
2741         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2742         __enable_cpu_timer_accounting(vcpu);
2743         preempt_enable();
2744 }
2745
2746 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2747 {
2748         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2749         __disable_cpu_timer_accounting(vcpu);
2750         preempt_enable();
2751 }
2752
2753 /* set the cpu timer - may only be called from the VCPU thread itself */
2754 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2755 {
2756         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2757         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2758         if (vcpu->arch.cputm_enabled)
2759                 vcpu->arch.cputm_start = get_tod_clock_fast();
2760         vcpu->arch.sie_block->cputm = cputm;
2761         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2762         preempt_enable();
2763 }
2764
2765 /* update and get the cpu timer - can also be called from other VCPU threads */
2766 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2767 {
2768         unsigned int seq;
2769         __u64 value;
2770
2771         if (unlikely(!vcpu->arch.cputm_enabled))
2772                 return vcpu->arch.sie_block->cputm;
2773
2774         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2775         do {
2776                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2777                 /*
2778                  * If the writer would ever execute a read in the critical
2779                  * section, e.g. in irq context, we have a deadlock.
2780                  */
2781                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2782                 value = vcpu->arch.sie_block->cputm;
2783                 /* if cputm_start is 0, accounting is being started/stopped */
2784                 if (likely(vcpu->arch.cputm_start))
2785                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2786         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2787         preempt_enable();
2788         return value;
2789 }
2790
2791 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2792 {
2793
2794         gmap_enable(vcpu->arch.enabled_gmap);
2795         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2796         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2797                 __start_cpu_timer_accounting(vcpu);
2798         vcpu->cpu = cpu;
2799 }
2800
2801 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2802 {
2803         vcpu->cpu = -1;
2804         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2805                 __stop_cpu_timer_accounting(vcpu);
2806         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2807         vcpu->arch.enabled_gmap = gmap_get_enabled();
2808         gmap_disable(vcpu->arch.enabled_gmap);
2809
2810 }
2811
2812 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2813 {
2814         mutex_lock(&vcpu->kvm->lock);
2815         preempt_disable();
2816         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2817         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2818         preempt_enable();
2819         mutex_unlock(&vcpu->kvm->lock);
2820         if (!kvm_is_ucontrol(vcpu->kvm)) {
2821                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2822                 sca_add_vcpu(vcpu);
2823         }
2824         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2825                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2826         /* make vcpu_load load the right gmap on the first trigger */
2827         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2828 }
2829
2830 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2831 {
2832         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2833             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2834                 return true;
2835         return false;
2836 }
2837
2838 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2839 {
2840         /* At least one ECC subfunction must be present */
2841         return kvm_has_pckmo_subfunc(kvm, 32) ||
2842                kvm_has_pckmo_subfunc(kvm, 33) ||
2843                kvm_has_pckmo_subfunc(kvm, 34) ||
2844                kvm_has_pckmo_subfunc(kvm, 40) ||
2845                kvm_has_pckmo_subfunc(kvm, 41);
2846
2847 }
2848
2849 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2850 {
2851         /*
2852          * If the AP instructions are not being interpreted and the MSAX3
2853          * facility is not configured for the guest, there is nothing to set up.
2854          */
2855         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2856                 return;
2857
2858         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2859         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2860         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2861         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2862
2863         if (vcpu->kvm->arch.crypto.apie)
2864                 vcpu->arch.sie_block->eca |= ECA_APIE;
2865
2866         /* Set up protected key support */
2867         if (vcpu->kvm->arch.crypto.aes_kw) {
2868                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2869                 /* ecc is also wrapped with AES key */
2870                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2871                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2872         }
2873
2874         if (vcpu->kvm->arch.crypto.dea_kw)
2875                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2876 }
2877
2878 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2879 {
2880         free_page(vcpu->arch.sie_block->cbrlo);
2881         vcpu->arch.sie_block->cbrlo = 0;
2882 }
2883
2884 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2885 {
2886         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2887         if (!vcpu->arch.sie_block->cbrlo)
2888                 return -ENOMEM;
2889         return 0;
2890 }
2891
2892 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2893 {
2894         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2895
2896         vcpu->arch.sie_block->ibc = model->ibc;
2897         if (test_kvm_facility(vcpu->kvm, 7))
2898                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2899 }
2900
2901 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
2902 {
2903         int rc = 0;
2904
2905         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2906                                                     CPUSTAT_SM |
2907                                                     CPUSTAT_STOPPED);
2908
2909         if (test_kvm_facility(vcpu->kvm, 78))
2910                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2911         else if (test_kvm_facility(vcpu->kvm, 8))
2912                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2913
2914         kvm_s390_vcpu_setup_model(vcpu);
2915
2916         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2917         if (MACHINE_HAS_ESOP)
2918                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2919         if (test_kvm_facility(vcpu->kvm, 9))
2920                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2921         if (test_kvm_facility(vcpu->kvm, 73))
2922                 vcpu->arch.sie_block->ecb |= ECB_TE;
2923
2924         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2925                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2926         if (test_kvm_facility(vcpu->kvm, 130))
2927                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2928         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2929         if (sclp.has_cei)
2930                 vcpu->arch.sie_block->eca |= ECA_CEI;
2931         if (sclp.has_ib)
2932                 vcpu->arch.sie_block->eca |= ECA_IB;
2933         if (sclp.has_siif)
2934                 vcpu->arch.sie_block->eca |= ECA_SII;
2935         if (sclp.has_sigpif)
2936                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2937         if (test_kvm_facility(vcpu->kvm, 129)) {
2938                 vcpu->arch.sie_block->eca |= ECA_VX;
2939                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2940         }
2941         if (test_kvm_facility(vcpu->kvm, 139))
2942                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2943         if (test_kvm_facility(vcpu->kvm, 156))
2944                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2945         if (vcpu->arch.sie_block->gd) {
2946                 vcpu->arch.sie_block->eca |= ECA_AIV;
2947                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2948                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2949         }
2950         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2951                                         | SDNXC;
2952         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2953
2954         if (sclp.has_kss)
2955                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2956         else
2957                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2958
2959         if (vcpu->kvm->arch.use_cmma) {
2960                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2961                 if (rc)
2962                         return rc;
2963         }
2964         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2965         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2966
2967         vcpu->arch.sie_block->hpid = HPID_KVM;
2968
2969         kvm_s390_vcpu_crypto_setup(vcpu);
2970
2971         return rc;
2972 }
2973
2974 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
2975 {
2976         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2977                 return -EINVAL;
2978         return 0;
2979 }
2980
2981 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
2982 {
2983         struct sie_page *sie_page;
2984         int rc;
2985
2986         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2987         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2988         if (!sie_page)
2989                 return -ENOMEM;
2990
2991         vcpu->arch.sie_block = &sie_page->sie_block;
2992         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2993
2994         /* the real guest size will always be smaller than msl */
2995         vcpu->arch.sie_block->mso = 0;
2996         vcpu->arch.sie_block->msl = sclp.hamax;
2997
2998         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
2999         spin_lock_init(&vcpu->arch.local_int.lock);
3000         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3001         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3002                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3003         seqcount_init(&vcpu->arch.cputm_seqcount);
3004
3005         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3006         kvm_clear_async_pf_completion_queue(vcpu);
3007         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3008                                     KVM_SYNC_GPRS |
3009                                     KVM_SYNC_ACRS |
3010                                     KVM_SYNC_CRS |
3011                                     KVM_SYNC_ARCH0 |
3012                                     KVM_SYNC_PFAULT;
3013         kvm_s390_set_prefix(vcpu, 0);
3014         if (test_kvm_facility(vcpu->kvm, 64))
3015                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3016         if (test_kvm_facility(vcpu->kvm, 82))
3017                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3018         if (test_kvm_facility(vcpu->kvm, 133))
3019                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3020         if (test_kvm_facility(vcpu->kvm, 156))
3021                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3022         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3023          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3024          */
3025         if (MACHINE_HAS_VX)
3026                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3027         else
3028                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3029
3030         if (kvm_is_ucontrol(vcpu->kvm)) {
3031                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3032                 if (rc)
3033                         goto out_free_sie_block;
3034         }
3035
3036         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3037                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3038         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3039
3040         rc = kvm_s390_vcpu_setup(vcpu);
3041         if (rc)
3042                 goto out_ucontrol_uninit;
3043         return 0;
3044
3045 out_ucontrol_uninit:
3046         if (kvm_is_ucontrol(vcpu->kvm))
3047                 gmap_remove(vcpu->arch.gmap);
3048 out_free_sie_block:
3049         free_page((unsigned long)(vcpu->arch.sie_block));
3050         return rc;
3051 }
3052
3053 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3054 {
3055         return kvm_s390_vcpu_has_irq(vcpu, 0);
3056 }
3057
3058 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3059 {
3060         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3061 }
3062
3063 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3064 {
3065         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3066         exit_sie(vcpu);
3067 }
3068
3069 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3070 {
3071         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3072 }
3073
3074 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3075 {
3076         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3077         exit_sie(vcpu);
3078 }
3079
3080 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3081 {
3082         return atomic_read(&vcpu->arch.sie_block->prog20) &
3083                (PROG_BLOCK_SIE | PROG_REQUEST);
3084 }
3085
3086 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3087 {
3088         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3089 }
3090
3091 /*
3092  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3093  * If the CPU is not running (e.g. waiting as idle) the function will
3094  * return immediately. */
3095 void exit_sie(struct kvm_vcpu *vcpu)
3096 {
3097         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3098         kvm_s390_vsie_kick(vcpu);
3099         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3100                 cpu_relax();
3101 }
3102
3103 /* Kick a guest cpu out of SIE to process a request synchronously */
3104 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3105 {
3106         kvm_make_request(req, vcpu);
3107         kvm_s390_vcpu_request(vcpu);
3108 }
3109
3110 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3111                               unsigned long end)
3112 {
3113         struct kvm *kvm = gmap->private;
3114         struct kvm_vcpu *vcpu;
3115         unsigned long prefix;
3116         int i;
3117
3118         if (gmap_is_shadow(gmap))
3119                 return;
3120         if (start >= 1UL << 31)
3121                 /* We are only interested in prefix pages */
3122                 return;
3123         kvm_for_each_vcpu(i, vcpu, kvm) {
3124                 /* match against both prefix pages */
3125                 prefix = kvm_s390_get_prefix(vcpu);
3126                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3127                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3128                                    start, end);
3129                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3130                 }
3131         }
3132 }
3133
3134 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3135 {
3136         /* do not poll with more than halt_poll_max_steal percent of steal time */
3137         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3138             halt_poll_max_steal) {
3139                 vcpu->stat.halt_no_poll_steal++;
3140                 return true;
3141         }
3142         return false;
3143 }
3144
3145 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3146 {
3147         /* kvm common code refers to this, but never calls it */
3148         BUG();
3149         return 0;
3150 }
3151
3152 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3153                                            struct kvm_one_reg *reg)
3154 {
3155         int r = -EINVAL;
3156
3157         switch (reg->id) {
3158         case KVM_REG_S390_TODPR:
3159                 r = put_user(vcpu->arch.sie_block->todpr,
3160                              (u32 __user *)reg->addr);
3161                 break;
3162         case KVM_REG_S390_EPOCHDIFF:
3163                 r = put_user(vcpu->arch.sie_block->epoch,
3164                              (u64 __user *)reg->addr);
3165                 break;
3166         case KVM_REG_S390_CPU_TIMER:
3167                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3168                              (u64 __user *)reg->addr);
3169                 break;
3170         case KVM_REG_S390_CLOCK_COMP:
3171                 r = put_user(vcpu->arch.sie_block->ckc,
3172                              (u64 __user *)reg->addr);
3173                 break;
3174         case KVM_REG_S390_PFTOKEN:
3175                 r = put_user(vcpu->arch.pfault_token,
3176                              (u64 __user *)reg->addr);
3177                 break;
3178         case KVM_REG_S390_PFCOMPARE:
3179                 r = put_user(vcpu->arch.pfault_compare,
3180                              (u64 __user *)reg->addr);
3181                 break;
3182         case KVM_REG_S390_PFSELECT:
3183                 r = put_user(vcpu->arch.pfault_select,
3184                              (u64 __user *)reg->addr);
3185                 break;
3186         case KVM_REG_S390_PP:
3187                 r = put_user(vcpu->arch.sie_block->pp,
3188                              (u64 __user *)reg->addr);
3189                 break;
3190         case KVM_REG_S390_GBEA:
3191                 r = put_user(vcpu->arch.sie_block->gbea,
3192                              (u64 __user *)reg->addr);
3193                 break;
3194         default:
3195                 break;
3196         }
3197
3198         return r;
3199 }
3200
3201 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3202                                            struct kvm_one_reg *reg)
3203 {
3204         int r = -EINVAL;
3205         __u64 val;
3206
3207         switch (reg->id) {
3208         case KVM_REG_S390_TODPR:
3209                 r = get_user(vcpu->arch.sie_block->todpr,
3210                              (u32 __user *)reg->addr);
3211                 break;
3212         case KVM_REG_S390_EPOCHDIFF:
3213                 r = get_user(vcpu->arch.sie_block->epoch,
3214                              (u64 __user *)reg->addr);
3215                 break;
3216         case KVM_REG_S390_CPU_TIMER:
3217                 r = get_user(val, (u64 __user *)reg->addr);
3218                 if (!r)
3219                         kvm_s390_set_cpu_timer(vcpu, val);
3220                 break;
3221         case KVM_REG_S390_CLOCK_COMP:
3222                 r = get_user(vcpu->arch.sie_block->ckc,
3223                              (u64 __user *)reg->addr);
3224                 break;
3225         case KVM_REG_S390_PFTOKEN:
3226                 r = get_user(vcpu->arch.pfault_token,
3227                              (u64 __user *)reg->addr);
3228                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3229                         kvm_clear_async_pf_completion_queue(vcpu);
3230                 break;
3231         case KVM_REG_S390_PFCOMPARE:
3232                 r = get_user(vcpu->arch.pfault_compare,
3233                              (u64 __user *)reg->addr);
3234                 break;
3235         case KVM_REG_S390_PFSELECT:
3236                 r = get_user(vcpu->arch.pfault_select,
3237                              (u64 __user *)reg->addr);
3238                 break;
3239         case KVM_REG_S390_PP:
3240                 r = get_user(vcpu->arch.sie_block->pp,
3241                              (u64 __user *)reg->addr);
3242                 break;
3243         case KVM_REG_S390_GBEA:
3244                 r = get_user(vcpu->arch.sie_block->gbea,
3245                              (u64 __user *)reg->addr);
3246                 break;
3247         default:
3248                 break;
3249         }
3250
3251         return r;
3252 }
3253
3254 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3255 {
3256         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3257         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3258         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3259
3260         kvm_clear_async_pf_completion_queue(vcpu);
3261         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3262                 kvm_s390_vcpu_stop(vcpu);
3263         kvm_s390_clear_local_irqs(vcpu);
3264 }
3265
3266 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3267 {
3268         /* Initial reset is a superset of the normal reset */
3269         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3270
3271         /*
3272          * This equals initial cpu reset in pop, but we don't switch to ESA.
3273          * We do not only reset the internal data, but also ...
3274          */
3275         vcpu->arch.sie_block->gpsw.mask = 0;
3276         vcpu->arch.sie_block->gpsw.addr = 0;
3277         kvm_s390_set_prefix(vcpu, 0);
3278         kvm_s390_set_cpu_timer(vcpu, 0);
3279         vcpu->arch.sie_block->ckc = 0;
3280         vcpu->arch.sie_block->todpr = 0;
3281         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3282         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3283         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3284
3285         /* ... the data in sync regs */
3286         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3287         vcpu->run->s.regs.ckc = 0;
3288         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3289         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3290         vcpu->run->psw_addr = 0;
3291         vcpu->run->psw_mask = 0;
3292         vcpu->run->s.regs.todpr = 0;
3293         vcpu->run->s.regs.cputm = 0;
3294         vcpu->run->s.regs.ckc = 0;
3295         vcpu->run->s.regs.pp = 0;
3296         vcpu->run->s.regs.gbea = 1;
3297         vcpu->run->s.regs.fpc = 0;
3298         vcpu->arch.sie_block->gbea = 1;
3299         vcpu->arch.sie_block->pp = 0;
3300         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3301 }
3302
3303 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3304 {
3305         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3306
3307         /* Clear reset is a superset of the initial reset */
3308         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3309
3310         memset(&regs->gprs, 0, sizeof(regs->gprs));
3311         memset(&regs->vrs, 0, sizeof(regs->vrs));
3312         memset(&regs->acrs, 0, sizeof(regs->acrs));
3313         memset(&regs->gscb, 0, sizeof(regs->gscb));
3314
3315         regs->etoken = 0;
3316         regs->etoken_extension = 0;
3317 }
3318
3319 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3320 {
3321         vcpu_load(vcpu);
3322         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3323         vcpu_put(vcpu);
3324         return 0;
3325 }
3326
3327 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3328 {
3329         vcpu_load(vcpu);
3330         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3331         vcpu_put(vcpu);
3332         return 0;
3333 }
3334
3335 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3336                                   struct kvm_sregs *sregs)
3337 {
3338         vcpu_load(vcpu);
3339
3340         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3341         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3342
3343         vcpu_put(vcpu);
3344         return 0;
3345 }
3346
3347 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3348                                   struct kvm_sregs *sregs)
3349 {
3350         vcpu_load(vcpu);
3351
3352         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3353         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3354
3355         vcpu_put(vcpu);
3356         return 0;
3357 }
3358
3359 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3360 {
3361         int ret = 0;
3362
3363         vcpu_load(vcpu);
3364
3365         if (test_fp_ctl(fpu->fpc)) {
3366                 ret = -EINVAL;
3367                 goto out;
3368         }
3369         vcpu->run->s.regs.fpc = fpu->fpc;
3370         if (MACHINE_HAS_VX)
3371                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3372                                  (freg_t *) fpu->fprs);
3373         else
3374                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3375
3376 out:
3377         vcpu_put(vcpu);
3378         return ret;
3379 }
3380
3381 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3382 {
3383         vcpu_load(vcpu);
3384
3385         /* make sure we have the latest values */
3386         save_fpu_regs();
3387         if (MACHINE_HAS_VX)
3388                 convert_vx_to_fp((freg_t *) fpu->fprs,
3389                                  (__vector128 *) vcpu->run->s.regs.vrs);
3390         else
3391                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3392         fpu->fpc = vcpu->run->s.regs.fpc;
3393
3394         vcpu_put(vcpu);
3395         return 0;
3396 }
3397
3398 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3399 {
3400         int rc = 0;
3401
3402         if (!is_vcpu_stopped(vcpu))
3403                 rc = -EBUSY;
3404         else {
3405                 vcpu->run->psw_mask = psw.mask;
3406                 vcpu->run->psw_addr = psw.addr;
3407         }
3408         return rc;
3409 }
3410
3411 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3412                                   struct kvm_translation *tr)
3413 {
3414         return -EINVAL; /* not implemented yet */
3415 }
3416
3417 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3418                               KVM_GUESTDBG_USE_HW_BP | \
3419                               KVM_GUESTDBG_ENABLE)
3420
3421 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3422                                         struct kvm_guest_debug *dbg)
3423 {
3424         int rc = 0;
3425
3426         vcpu_load(vcpu);
3427
3428         vcpu->guest_debug = 0;
3429         kvm_s390_clear_bp_data(vcpu);
3430
3431         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3432                 rc = -EINVAL;
3433                 goto out;
3434         }
3435         if (!sclp.has_gpere) {
3436                 rc = -EINVAL;
3437                 goto out;
3438         }
3439
3440         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3441                 vcpu->guest_debug = dbg->control;
3442                 /* enforce guest PER */
3443                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3444
3445                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3446                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3447         } else {
3448                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3449                 vcpu->arch.guestdbg.last_bp = 0;
3450         }
3451
3452         if (rc) {
3453                 vcpu->guest_debug = 0;
3454                 kvm_s390_clear_bp_data(vcpu);
3455                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3456         }
3457
3458 out:
3459         vcpu_put(vcpu);
3460         return rc;
3461 }
3462
3463 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3464                                     struct kvm_mp_state *mp_state)
3465 {
3466         int ret;
3467
3468         vcpu_load(vcpu);
3469
3470         /* CHECK_STOP and LOAD are not supported yet */
3471         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3472                                       KVM_MP_STATE_OPERATING;
3473
3474         vcpu_put(vcpu);
3475         return ret;
3476 }
3477
3478 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3479                                     struct kvm_mp_state *mp_state)
3480 {
3481         int rc = 0;
3482
3483         vcpu_load(vcpu);
3484
3485         /* user space knows about this interface - let it control the state */
3486         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3487
3488         switch (mp_state->mp_state) {
3489         case KVM_MP_STATE_STOPPED:
3490                 kvm_s390_vcpu_stop(vcpu);
3491                 break;
3492         case KVM_MP_STATE_OPERATING:
3493                 kvm_s390_vcpu_start(vcpu);
3494                 break;
3495         case KVM_MP_STATE_LOAD:
3496         case KVM_MP_STATE_CHECK_STOP:
3497                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3498         default:
3499                 rc = -ENXIO;
3500         }
3501
3502         vcpu_put(vcpu);
3503         return rc;
3504 }
3505
3506 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3507 {
3508         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3509 }
3510
3511 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3512 {
3513 retry:
3514         kvm_s390_vcpu_request_handled(vcpu);
3515         if (!kvm_request_pending(vcpu))
3516                 return 0;
3517         /*
3518          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3519          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3520          * This ensures that the ipte instruction for this request has
3521          * already finished. We might race against a second unmapper that
3522          * wants to set the blocking bit. Lets just retry the request loop.
3523          */
3524         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3525                 int rc;
3526                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3527                                           kvm_s390_get_prefix(vcpu),
3528                                           PAGE_SIZE * 2, PROT_WRITE);
3529                 if (rc) {
3530                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3531                         return rc;
3532                 }
3533                 goto retry;
3534         }
3535
3536         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3537                 vcpu->arch.sie_block->ihcpu = 0xffff;
3538                 goto retry;
3539         }
3540
3541         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3542                 if (!ibs_enabled(vcpu)) {
3543                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3544                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3545                 }
3546                 goto retry;
3547         }
3548
3549         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3550                 if (ibs_enabled(vcpu)) {
3551                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3552                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3553                 }
3554                 goto retry;
3555         }
3556
3557         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3558                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3559                 goto retry;
3560         }
3561
3562         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3563                 /*
3564                  * Disable CMM virtualization; we will emulate the ESSA
3565                  * instruction manually, in order to provide additional
3566                  * functionalities needed for live migration.
3567                  */
3568                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3569                 goto retry;
3570         }
3571
3572         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3573                 /*
3574                  * Re-enable CMM virtualization if CMMA is available and
3575                  * CMM has been used.
3576                  */
3577                 if ((vcpu->kvm->arch.use_cmma) &&
3578                     (vcpu->kvm->mm->context.uses_cmm))
3579                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3580                 goto retry;
3581         }
3582
3583         /* nothing to do, just clear the request */
3584         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3585         /* we left the vsie handler, nothing to do, just clear the request */
3586         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3587
3588         return 0;
3589 }
3590
3591 void kvm_s390_set_tod_clock(struct kvm *kvm,
3592                             const struct kvm_s390_vm_tod_clock *gtod)
3593 {
3594         struct kvm_vcpu *vcpu;
3595         struct kvm_s390_tod_clock_ext htod;
3596         int i;
3597
3598         mutex_lock(&kvm->lock);
3599         preempt_disable();
3600
3601         get_tod_clock_ext((char *)&htod);
3602
3603         kvm->arch.epoch = gtod->tod - htod.tod;
3604         kvm->arch.epdx = 0;
3605         if (test_kvm_facility(kvm, 139)) {
3606                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3607                 if (kvm->arch.epoch > gtod->tod)
3608                         kvm->arch.epdx -= 1;
3609         }
3610
3611         kvm_s390_vcpu_block_all(kvm);
3612         kvm_for_each_vcpu(i, vcpu, kvm) {
3613                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3614                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3615         }
3616
3617         kvm_s390_vcpu_unblock_all(kvm);
3618         preempt_enable();
3619         mutex_unlock(&kvm->lock);
3620 }
3621
3622 /**
3623  * kvm_arch_fault_in_page - fault-in guest page if necessary
3624  * @vcpu: The corresponding virtual cpu
3625  * @gpa: Guest physical address
3626  * @writable: Whether the page should be writable or not
3627  *
3628  * Make sure that a guest page has been faulted-in on the host.
3629  *
3630  * Return: Zero on success, negative error code otherwise.
3631  */
3632 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3633 {
3634         return gmap_fault(vcpu->arch.gmap, gpa,
3635                           writable ? FAULT_FLAG_WRITE : 0);
3636 }
3637
3638 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3639                                       unsigned long token)
3640 {
3641         struct kvm_s390_interrupt inti;
3642         struct kvm_s390_irq irq;
3643
3644         if (start_token) {
3645                 irq.u.ext.ext_params2 = token;
3646                 irq.type = KVM_S390_INT_PFAULT_INIT;
3647                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3648         } else {
3649                 inti.type = KVM_S390_INT_PFAULT_DONE;
3650                 inti.parm64 = token;
3651                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3652         }
3653 }
3654
3655 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3656                                      struct kvm_async_pf *work)
3657 {
3658         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3659         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3660 }
3661
3662 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3663                                  struct kvm_async_pf *work)
3664 {
3665         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3666         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3667 }
3668
3669 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3670                                struct kvm_async_pf *work)
3671 {
3672         /* s390 will always inject the page directly */
3673 }
3674
3675 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3676 {
3677         /*
3678          * s390 will always inject the page directly,
3679          * but we still want check_async_completion to cleanup
3680          */
3681         return true;
3682 }
3683
3684 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3685 {
3686         hva_t hva;
3687         struct kvm_arch_async_pf arch;
3688         int rc;
3689
3690         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3691                 return 0;
3692         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3693             vcpu->arch.pfault_compare)
3694                 return 0;
3695         if (psw_extint_disabled(vcpu))
3696                 return 0;
3697         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3698                 return 0;
3699         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3700                 return 0;
3701         if (!vcpu->arch.gmap->pfault_enabled)
3702                 return 0;
3703
3704         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3705         hva += current->thread.gmap_addr & ~PAGE_MASK;
3706         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3707                 return 0;
3708
3709         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3710         return rc;
3711 }
3712
3713 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3714 {
3715         int rc, cpuflags;
3716
3717         /*
3718          * On s390 notifications for arriving pages will be delivered directly
3719          * to the guest but the house keeping for completed pfaults is
3720          * handled outside the worker.
3721          */
3722         kvm_check_async_pf_completion(vcpu);
3723
3724         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3725         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3726
3727         if (need_resched())
3728                 schedule();
3729
3730         if (test_cpu_flag(CIF_MCCK_PENDING))
3731                 s390_handle_mcck();
3732
3733         if (!kvm_is_ucontrol(vcpu->kvm)) {
3734                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3735                 if (rc)
3736                         return rc;
3737         }
3738
3739         rc = kvm_s390_handle_requests(vcpu);
3740         if (rc)
3741                 return rc;
3742
3743         if (guestdbg_enabled(vcpu)) {
3744                 kvm_s390_backup_guest_per_regs(vcpu);
3745                 kvm_s390_patch_guest_per_regs(vcpu);
3746         }
3747
3748         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3749
3750         vcpu->arch.sie_block->icptcode = 0;
3751         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3752         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3753         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3754
3755         return 0;
3756 }
3757
3758 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3759 {
3760         struct kvm_s390_pgm_info pgm_info = {
3761                 .code = PGM_ADDRESSING,
3762         };
3763         u8 opcode, ilen;
3764         int rc;
3765
3766         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3767         trace_kvm_s390_sie_fault(vcpu);
3768
3769         /*
3770          * We want to inject an addressing exception, which is defined as a
3771          * suppressing or terminating exception. However, since we came here
3772          * by a DAT access exception, the PSW still points to the faulting
3773          * instruction since DAT exceptions are nullifying. So we've got
3774          * to look up the current opcode to get the length of the instruction
3775          * to be able to forward the PSW.
3776          */
3777         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3778         ilen = insn_length(opcode);
3779         if (rc < 0) {
3780                 return rc;
3781         } else if (rc) {
3782                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3783                  * Forward by arbitrary ilc, injection will take care of
3784                  * nullification if necessary.
3785                  */
3786                 pgm_info = vcpu->arch.pgm;
3787                 ilen = 4;
3788         }
3789         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3790         kvm_s390_forward_psw(vcpu, ilen);
3791         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3792 }
3793
3794 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3795 {
3796         struct mcck_volatile_info *mcck_info;
3797         struct sie_page *sie_page;
3798
3799         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3800                    vcpu->arch.sie_block->icptcode);
3801         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3802
3803         if (guestdbg_enabled(vcpu))
3804                 kvm_s390_restore_guest_per_regs(vcpu);
3805
3806         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3807         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3808
3809         if (exit_reason == -EINTR) {
3810                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3811                 sie_page = container_of(vcpu->arch.sie_block,
3812                                         struct sie_page, sie_block);
3813                 mcck_info = &sie_page->mcck_info;
3814                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3815                 return 0;
3816         }
3817
3818         if (vcpu->arch.sie_block->icptcode > 0) {
3819                 int rc = kvm_handle_sie_intercept(vcpu);
3820
3821                 if (rc != -EOPNOTSUPP)
3822                         return rc;
3823                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3824                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3825                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3826                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3827                 return -EREMOTE;
3828         } else if (exit_reason != -EFAULT) {
3829                 vcpu->stat.exit_null++;
3830                 return 0;
3831         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3832                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3833                 vcpu->run->s390_ucontrol.trans_exc_code =
3834                                                 current->thread.gmap_addr;
3835                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3836                 return -EREMOTE;
3837         } else if (current->thread.gmap_pfault) {
3838                 trace_kvm_s390_major_guest_pfault(vcpu);
3839                 current->thread.gmap_pfault = 0;
3840                 if (kvm_arch_setup_async_pf(vcpu))
3841                         return 0;
3842                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3843         }
3844         return vcpu_post_run_fault_in_sie(vcpu);
3845 }
3846
3847 static int __vcpu_run(struct kvm_vcpu *vcpu)
3848 {
3849         int rc, exit_reason;
3850
3851         /*
3852          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3853          * ning the guest), so that memslots (and other stuff) are protected
3854          */
3855         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3856
3857         do {
3858                 rc = vcpu_pre_run(vcpu);
3859                 if (rc)
3860                         break;
3861
3862                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3863                 /*
3864                  * As PF_VCPU will be used in fault handler, between
3865                  * guest_enter and guest_exit should be no uaccess.
3866                  */
3867                 local_irq_disable();
3868                 guest_enter_irqoff();
3869                 __disable_cpu_timer_accounting(vcpu);
3870                 local_irq_enable();
3871                 exit_reason = sie64a(vcpu->arch.sie_block,
3872                                      vcpu->run->s.regs.gprs);
3873                 local_irq_disable();
3874                 __enable_cpu_timer_accounting(vcpu);
3875                 guest_exit_irqoff();
3876                 local_irq_enable();
3877                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3878
3879                 rc = vcpu_post_run(vcpu, exit_reason);
3880         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3881
3882         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3883         return rc;
3884 }
3885
3886 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3887 {
3888         struct runtime_instr_cb *riccb;
3889         struct gs_cb *gscb;
3890
3891         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3892         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3893         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3894         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3895         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3896                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3897         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3898                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3899                 /* some control register changes require a tlb flush */
3900                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3901         }
3902         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3903                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3904                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3905                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3906                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3907                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3908         }
3909         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3910                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3911                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3912                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3913                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3914                         kvm_clear_async_pf_completion_queue(vcpu);
3915         }
3916         /*
3917          * If userspace sets the riccb (e.g. after migration) to a valid state,
3918          * we should enable RI here instead of doing the lazy enablement.
3919          */
3920         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3921             test_kvm_facility(vcpu->kvm, 64) &&
3922             riccb->v &&
3923             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3924                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3925                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3926         }
3927         /*
3928          * If userspace sets the gscb (e.g. after migration) to non-zero,
3929          * we should enable GS here instead of doing the lazy enablement.
3930          */
3931         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3932             test_kvm_facility(vcpu->kvm, 133) &&
3933             gscb->gssm &&
3934             !vcpu->arch.gs_enabled) {
3935                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3936                 vcpu->arch.sie_block->ecb |= ECB_GS;
3937                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3938                 vcpu->arch.gs_enabled = 1;
3939         }
3940         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3941             test_kvm_facility(vcpu->kvm, 82)) {
3942                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3943                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3944         }
3945         save_access_regs(vcpu->arch.host_acrs);
3946         restore_access_regs(vcpu->run->s.regs.acrs);
3947         /* save host (userspace) fprs/vrs */
3948         save_fpu_regs();
3949         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3950         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3951         if (MACHINE_HAS_VX)
3952                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3953         else
3954                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3955         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3956         if (test_fp_ctl(current->thread.fpu.fpc))
3957                 /* User space provided an invalid FPC, let's clear it */
3958                 current->thread.fpu.fpc = 0;
3959         if (MACHINE_HAS_GS) {
3960                 preempt_disable();
3961                 __ctl_set_bit(2, 4);
3962                 if (current->thread.gs_cb) {
3963                         vcpu->arch.host_gscb = current->thread.gs_cb;
3964                         save_gs_cb(vcpu->arch.host_gscb);
3965                 }
3966                 if (vcpu->arch.gs_enabled) {
3967                         current->thread.gs_cb = (struct gs_cb *)
3968                                                 &vcpu->run->s.regs.gscb;
3969                         restore_gs_cb(current->thread.gs_cb);
3970                 }
3971                 preempt_enable();
3972         }
3973         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3974
3975         kvm_run->kvm_dirty_regs = 0;
3976 }
3977
3978 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3979 {
3980         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3981         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3982         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3983         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3984         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3985         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3986         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3987         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3988         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3989         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3990         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3991         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3992         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3993         save_access_regs(vcpu->run->s.regs.acrs);
3994         restore_access_regs(vcpu->arch.host_acrs);
3995         /* Save guest register state */
3996         save_fpu_regs();
3997         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3998         /* Restore will be done lazily at return */
3999         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4000         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4001         if (MACHINE_HAS_GS) {
4002                 __ctl_set_bit(2, 4);
4003                 if (vcpu->arch.gs_enabled)
4004                         save_gs_cb(current->thread.gs_cb);
4005                 preempt_disable();
4006                 current->thread.gs_cb = vcpu->arch.host_gscb;
4007                 restore_gs_cb(vcpu->arch.host_gscb);
4008                 preempt_enable();
4009                 if (!vcpu->arch.host_gscb)
4010                         __ctl_clear_bit(2, 4);
4011                 vcpu->arch.host_gscb = NULL;
4012         }
4013         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4014 }
4015
4016 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4017 {
4018         int rc;
4019
4020         if (kvm_run->immediate_exit)
4021                 return -EINTR;
4022
4023         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4024             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4025                 return -EINVAL;
4026
4027         vcpu_load(vcpu);
4028
4029         if (guestdbg_exit_pending(vcpu)) {
4030                 kvm_s390_prepare_debug_exit(vcpu);
4031                 rc = 0;
4032                 goto out;
4033         }
4034
4035         kvm_sigset_activate(vcpu);
4036
4037         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4038                 kvm_s390_vcpu_start(vcpu);
4039         } else if (is_vcpu_stopped(vcpu)) {
4040                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4041                                    vcpu->vcpu_id);
4042                 rc = -EINVAL;
4043                 goto out;
4044         }
4045
4046         sync_regs(vcpu, kvm_run);
4047         enable_cpu_timer_accounting(vcpu);
4048
4049         might_fault();
4050         rc = __vcpu_run(vcpu);
4051
4052         if (signal_pending(current) && !rc) {
4053                 kvm_run->exit_reason = KVM_EXIT_INTR;
4054                 rc = -EINTR;
4055         }
4056
4057         if (guestdbg_exit_pending(vcpu) && !rc)  {
4058                 kvm_s390_prepare_debug_exit(vcpu);
4059                 rc = 0;
4060         }
4061
4062         if (rc == -EREMOTE) {
4063                 /* userspace support is needed, kvm_run has been prepared */
4064                 rc = 0;
4065         }
4066
4067         disable_cpu_timer_accounting(vcpu);
4068         store_regs(vcpu, kvm_run);
4069
4070         kvm_sigset_deactivate(vcpu);
4071
4072         vcpu->stat.exit_userspace++;
4073 out:
4074         vcpu_put(vcpu);
4075         return rc;
4076 }
4077
4078 /*
4079  * store status at address
4080  * we use have two special cases:
4081  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4082  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4083  */
4084 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4085 {
4086         unsigned char archmode = 1;
4087         freg_t fprs[NUM_FPRS];
4088         unsigned int px;
4089         u64 clkcomp, cputm;
4090         int rc;
4091
4092         px = kvm_s390_get_prefix(vcpu);
4093         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4094                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4095                         return -EFAULT;
4096                 gpa = 0;
4097         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4098                 if (write_guest_real(vcpu, 163, &archmode, 1))
4099                         return -EFAULT;
4100                 gpa = px;
4101         } else
4102                 gpa -= __LC_FPREGS_SAVE_AREA;
4103
4104         /* manually convert vector registers if necessary */
4105         if (MACHINE_HAS_VX) {
4106                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4107                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4108                                      fprs, 128);
4109         } else {
4110                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4111                                      vcpu->run->s.regs.fprs, 128);
4112         }
4113         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4114                               vcpu->run->s.regs.gprs, 128);
4115         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4116                               &vcpu->arch.sie_block->gpsw, 16);
4117         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4118                               &px, 4);
4119         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4120                               &vcpu->run->s.regs.fpc, 4);
4121         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4122                               &vcpu->arch.sie_block->todpr, 4);
4123         cputm = kvm_s390_get_cpu_timer(vcpu);
4124         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4125                               &cputm, 8);
4126         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4127         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4128                               &clkcomp, 8);
4129         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4130                               &vcpu->run->s.regs.acrs, 64);
4131         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4132                               &vcpu->arch.sie_block->gcr, 128);
4133         return rc ? -EFAULT : 0;
4134 }
4135
4136 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4137 {
4138         /*
4139          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4140          * switch in the run ioctl. Let's update our copies before we save
4141          * it into the save area
4142          */
4143         save_fpu_regs();
4144         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4145         save_access_regs(vcpu->run->s.regs.acrs);
4146
4147         return kvm_s390_store_status_unloaded(vcpu, addr);
4148 }
4149
4150 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4151 {
4152         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4153         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4154 }
4155
4156 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4157 {
4158         unsigned int i;
4159         struct kvm_vcpu *vcpu;
4160
4161         kvm_for_each_vcpu(i, vcpu, kvm) {
4162                 __disable_ibs_on_vcpu(vcpu);
4163         }
4164 }
4165
4166 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4167 {
4168         if (!sclp.has_ibs)
4169                 return;
4170         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4171         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4172 }
4173
4174 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4175 {
4176         int i, online_vcpus, started_vcpus = 0;
4177
4178         if (!is_vcpu_stopped(vcpu))
4179                 return;
4180
4181         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4182         /* Only one cpu at a time may enter/leave the STOPPED state. */
4183         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4184         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4185
4186         for (i = 0; i < online_vcpus; i++) {
4187                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4188                         started_vcpus++;
4189         }
4190
4191         if (started_vcpus == 0) {
4192                 /* we're the only active VCPU -> speed it up */
4193                 __enable_ibs_on_vcpu(vcpu);
4194         } else if (started_vcpus == 1) {
4195                 /*
4196                  * As we are starting a second VCPU, we have to disable
4197                  * the IBS facility on all VCPUs to remove potentially
4198                  * oustanding ENABLE requests.
4199                  */
4200                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4201         }
4202
4203         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4204         /*
4205          * Another VCPU might have used IBS while we were offline.
4206          * Let's play safe and flush the VCPU at startup.
4207          */
4208         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4209         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4210         return;
4211 }
4212
4213 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4214 {
4215         int i, online_vcpus, started_vcpus = 0;
4216         struct kvm_vcpu *started_vcpu = NULL;
4217
4218         if (is_vcpu_stopped(vcpu))
4219                 return;
4220
4221         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4222         /* Only one cpu at a time may enter/leave the STOPPED state. */
4223         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4224         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4225
4226         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4227         kvm_s390_clear_stop_irq(vcpu);
4228
4229         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4230         __disable_ibs_on_vcpu(vcpu);
4231
4232         for (i = 0; i < online_vcpus; i++) {
4233                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4234                         started_vcpus++;
4235                         started_vcpu = vcpu->kvm->vcpus[i];
4236                 }
4237         }
4238
4239         if (started_vcpus == 1) {
4240                 /*
4241                  * As we only have one VCPU left, we want to enable the
4242                  * IBS facility for that VCPU to speed it up.
4243                  */
4244                 __enable_ibs_on_vcpu(started_vcpu);
4245         }
4246
4247         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4248         return;
4249 }
4250
4251 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4252                                      struct kvm_enable_cap *cap)
4253 {
4254         int r;
4255
4256         if (cap->flags)
4257                 return -EINVAL;
4258
4259         switch (cap->cap) {
4260         case KVM_CAP_S390_CSS_SUPPORT:
4261                 if (!vcpu->kvm->arch.css_support) {
4262                         vcpu->kvm->arch.css_support = 1;
4263                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4264                         trace_kvm_s390_enable_css(vcpu->kvm);
4265                 }
4266                 r = 0;
4267                 break;
4268         default:
4269                 r = -EINVAL;
4270                 break;
4271         }
4272         return r;
4273 }
4274
4275 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4276                                   struct kvm_s390_mem_op *mop)
4277 {
4278         void __user *uaddr = (void __user *)mop->buf;
4279         void *tmpbuf = NULL;
4280         int r, srcu_idx;
4281         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4282                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4283
4284         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4285                 return -EINVAL;
4286
4287         if (mop->size > MEM_OP_MAX_SIZE)
4288                 return -E2BIG;
4289
4290         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4291                 tmpbuf = vmalloc(mop->size);
4292                 if (!tmpbuf)
4293                         return -ENOMEM;
4294         }
4295
4296         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4297
4298         switch (mop->op) {
4299         case KVM_S390_MEMOP_LOGICAL_READ:
4300                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4301                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4302                                             mop->size, GACC_FETCH);
4303                         break;
4304                 }
4305                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4306                 if (r == 0) {
4307                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4308                                 r = -EFAULT;
4309                 }
4310                 break;
4311         case KVM_S390_MEMOP_LOGICAL_WRITE:
4312                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4313                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4314                                             mop->size, GACC_STORE);
4315                         break;
4316                 }
4317                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4318                         r = -EFAULT;
4319                         break;
4320                 }
4321                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4322                 break;
4323         default:
4324                 r = -EINVAL;
4325         }
4326
4327         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4328
4329         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4330                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4331
4332         vfree(tmpbuf);
4333         return r;
4334 }
4335
4336 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4337                                unsigned int ioctl, unsigned long arg)
4338 {
4339         struct kvm_vcpu *vcpu = filp->private_data;
4340         void __user *argp = (void __user *)arg;
4341
4342         switch (ioctl) {
4343         case KVM_S390_IRQ: {
4344                 struct kvm_s390_irq s390irq;
4345
4346                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4347                         return -EFAULT;
4348                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4349         }
4350         case KVM_S390_INTERRUPT: {
4351                 struct kvm_s390_interrupt s390int;
4352                 struct kvm_s390_irq s390irq = {};
4353
4354                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4355                         return -EFAULT;
4356                 if (s390int_to_s390irq(&s390int, &s390irq))
4357                         return -EINVAL;
4358                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4359         }
4360         }
4361         return -ENOIOCTLCMD;
4362 }
4363
4364 long kvm_arch_vcpu_ioctl(struct file *filp,
4365                          unsigned int ioctl, unsigned long arg)
4366 {
4367         struct kvm_vcpu *vcpu = filp->private_data;
4368         void __user *argp = (void __user *)arg;
4369         int idx;
4370         long r;
4371
4372         vcpu_load(vcpu);
4373
4374         switch (ioctl) {
4375         case KVM_S390_STORE_STATUS:
4376                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4377                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4378                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4379                 break;
4380         case KVM_S390_SET_INITIAL_PSW: {
4381                 psw_t psw;
4382
4383                 r = -EFAULT;
4384                 if (copy_from_user(&psw, argp, sizeof(psw)))
4385                         break;
4386                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4387                 break;
4388         }
4389         case KVM_S390_CLEAR_RESET:
4390                 r = 0;
4391                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4392                 break;
4393         case KVM_S390_INITIAL_RESET:
4394                 r = 0;
4395                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4396                 break;
4397         case KVM_S390_NORMAL_RESET:
4398                 r = 0;
4399                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4400                 break;
4401         case KVM_SET_ONE_REG:
4402         case KVM_GET_ONE_REG: {
4403                 struct kvm_one_reg reg;
4404                 r = -EFAULT;
4405                 if (copy_from_user(&reg, argp, sizeof(reg)))
4406                         break;
4407                 if (ioctl == KVM_SET_ONE_REG)
4408                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4409                 else
4410                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4411                 break;
4412         }
4413 #ifdef CONFIG_KVM_S390_UCONTROL
4414         case KVM_S390_UCAS_MAP: {
4415                 struct kvm_s390_ucas_mapping ucasmap;
4416
4417                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4418                         r = -EFAULT;
4419                         break;
4420                 }
4421
4422                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4423                         r = -EINVAL;
4424                         break;
4425                 }
4426
4427                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4428                                      ucasmap.vcpu_addr, ucasmap.length);
4429                 break;
4430         }
4431         case KVM_S390_UCAS_UNMAP: {
4432                 struct kvm_s390_ucas_mapping ucasmap;
4433
4434                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4435                         r = -EFAULT;
4436                         break;
4437                 }
4438
4439                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4440                         r = -EINVAL;
4441                         break;
4442                 }
4443
4444                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4445                         ucasmap.length);
4446                 break;
4447         }
4448 #endif
4449         case KVM_S390_VCPU_FAULT: {
4450                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4451                 break;
4452         }
4453         case KVM_ENABLE_CAP:
4454         {
4455                 struct kvm_enable_cap cap;
4456                 r = -EFAULT;
4457                 if (copy_from_user(&cap, argp, sizeof(cap)))
4458                         break;
4459                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4460                 break;
4461         }
4462         case KVM_S390_MEM_OP: {
4463                 struct kvm_s390_mem_op mem_op;
4464
4465                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4466                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4467                 else
4468                         r = -EFAULT;
4469                 break;
4470         }
4471         case KVM_S390_SET_IRQ_STATE: {
4472                 struct kvm_s390_irq_state irq_state;
4473
4474                 r = -EFAULT;
4475                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4476                         break;
4477                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4478                     irq_state.len == 0 ||
4479                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4480                         r = -EINVAL;
4481                         break;
4482                 }
4483                 /* do not use irq_state.flags, it will break old QEMUs */
4484                 r = kvm_s390_set_irq_state(vcpu,
4485                                            (void __user *) irq_state.buf,
4486                                            irq_state.len);
4487                 break;
4488         }
4489         case KVM_S390_GET_IRQ_STATE: {
4490                 struct kvm_s390_irq_state irq_state;
4491
4492                 r = -EFAULT;
4493                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4494                         break;
4495                 if (irq_state.len == 0) {
4496                         r = -EINVAL;
4497                         break;
4498                 }
4499                 /* do not use irq_state.flags, it will break old QEMUs */
4500                 r = kvm_s390_get_irq_state(vcpu,
4501                                            (__u8 __user *)  irq_state.buf,
4502                                            irq_state.len);
4503                 break;
4504         }
4505         default:
4506                 r = -ENOTTY;
4507         }
4508
4509         vcpu_put(vcpu);
4510         return r;
4511 }
4512
4513 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4514 {
4515 #ifdef CONFIG_KVM_S390_UCONTROL
4516         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4517                  && (kvm_is_ucontrol(vcpu->kvm))) {
4518                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4519                 get_page(vmf->page);
4520                 return 0;
4521         }
4522 #endif
4523         return VM_FAULT_SIGBUS;
4524 }
4525
4526 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4527                             unsigned long npages)
4528 {
4529         return 0;
4530 }
4531
4532 /* Section: memory related */
4533 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4534                                    struct kvm_memory_slot *memslot,
4535                                    const struct kvm_userspace_memory_region *mem,
4536                                    enum kvm_mr_change change)
4537 {
4538         /* A few sanity checks. We can have memory slots which have to be
4539            located/ended at a segment boundary (1MB). The memory in userland is
4540            ok to be fragmented into various different vmas. It is okay to mmap()
4541            and munmap() stuff in this slot after doing this call at any time */
4542
4543         if (mem->userspace_addr & 0xffffful)
4544                 return -EINVAL;
4545
4546         if (mem->memory_size & 0xffffful)
4547                 return -EINVAL;
4548
4549         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4550                 return -EINVAL;
4551
4552         return 0;
4553 }
4554
4555 void kvm_arch_commit_memory_region(struct kvm *kvm,
4556                                 const struct kvm_userspace_memory_region *mem,
4557                                 const struct kvm_memory_slot *old,
4558                                 const struct kvm_memory_slot *new,
4559                                 enum kvm_mr_change change)
4560 {
4561         int rc = 0;
4562
4563         switch (change) {
4564         case KVM_MR_DELETE:
4565                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4566                                         old->npages * PAGE_SIZE);
4567                 break;
4568         case KVM_MR_MOVE:
4569                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4570                                         old->npages * PAGE_SIZE);
4571                 if (rc)
4572                         break;
4573                 /* FALLTHROUGH */
4574         case KVM_MR_CREATE:
4575                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4576                                       mem->guest_phys_addr, mem->memory_size);
4577                 break;
4578         case KVM_MR_FLAGS_ONLY:
4579                 break;
4580         default:
4581                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4582         }
4583         if (rc)
4584                 pr_warn("failed to commit memory region\n");
4585         return;
4586 }
4587
4588 static inline unsigned long nonhyp_mask(int i)
4589 {
4590         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4591
4592         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4593 }
4594
4595 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4596 {
4597         vcpu->valid_wakeup = false;
4598 }
4599
4600 static int __init kvm_s390_init(void)
4601 {
4602         int i;
4603
4604         if (!sclp.has_sief2) {
4605                 pr_info("SIE is not available\n");
4606                 return -ENODEV;
4607         }
4608
4609         if (nested && hpage) {
4610                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4611                 return -EINVAL;
4612         }
4613
4614         for (i = 0; i < 16; i++)
4615                 kvm_s390_fac_base[i] |=
4616                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4617
4618         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4619 }
4620
4621 static void __exit kvm_s390_exit(void)
4622 {
4623         kvm_exit();
4624 }
4625
4626 module_init(kvm_s390_init);
4627 module_exit(kvm_s390_exit);
4628
4629 /*
4630  * Enable autoloading of the kvm module.
4631  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4632  * since x86 takes a different approach.
4633  */
4634 #include <linux/miscdevice.h>
4635 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4636 MODULE_ALIAS("devname:kvm");