KVM: X86: Declare KVM_CAP_SET_GUEST_DEBUG properly
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64         { "userspace_handled", VCPU_STAT(exit_userspace) },
65         { "exit_null", VCPU_STAT(exit_null) },
66         { "exit_validity", VCPU_STAT(exit_validity) },
67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68         { "exit_external_request", VCPU_STAT(exit_external_request) },
69         { "exit_io_request", VCPU_STAT(exit_io_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
80         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
81         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
82         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
83         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
84         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
85         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
86         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
87         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
88         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
89         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
90         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
91         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
92         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
93         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
94         { "deliver_program", VCPU_STAT(deliver_program) },
95         { "deliver_io", VCPU_STAT(deliver_io) },
96         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
97         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
98         { "inject_ckc", VCPU_STAT(inject_ckc) },
99         { "inject_cputm", VCPU_STAT(inject_cputm) },
100         { "inject_external_call", VCPU_STAT(inject_external_call) },
101         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
102         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
103         { "inject_io", VM_STAT(inject_io) },
104         { "inject_mchk", VCPU_STAT(inject_mchk) },
105         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
106         { "inject_program", VCPU_STAT(inject_program) },
107         { "inject_restart", VCPU_STAT(inject_restart) },
108         { "inject_service_signal", VM_STAT(inject_service_signal) },
109         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
110         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
111         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
112         { "inject_virtio", VM_STAT(inject_virtio) },
113         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
114         { "instruction_gs", VCPU_STAT(instruction_gs) },
115         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
116         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
117         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
118         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
119         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
120         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
121         { "instruction_sck", VCPU_STAT(instruction_sck) },
122         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
123         { "instruction_spx", VCPU_STAT(instruction_spx) },
124         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
125         { "instruction_stap", VCPU_STAT(instruction_stap) },
126         { "instruction_iske", VCPU_STAT(instruction_iske) },
127         { "instruction_ri", VCPU_STAT(instruction_ri) },
128         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
129         { "instruction_sske", VCPU_STAT(instruction_sske) },
130         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
131         { "instruction_essa", VCPU_STAT(instruction_essa) },
132         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
133         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
134         { "instruction_tb", VCPU_STAT(instruction_tb) },
135         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
136         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
137         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
138         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
139         { "instruction_sie", VCPU_STAT(instruction_sie) },
140         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
141         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
142         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
143         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
144         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
145         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
146         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
147         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
148         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
149         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
150         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
151         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
152         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
153         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
154         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
155         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
156         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
157         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
158         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
159         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
160         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
161         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
162         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
163         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
164         { NULL }
165 };
166
167 struct kvm_s390_tod_clock_ext {
168         __u8 epoch_idx;
169         __u64 tod;
170         __u8 reserved[7];
171 } __packed;
172
173 /* allow nested virtualization in KVM (if enabled by user space) */
174 static int nested;
175 module_param(nested, int, S_IRUGO);
176 MODULE_PARM_DESC(nested, "Nested virtualization support");
177
178 /* allow 1m huge page guest backing, if !nested */
179 static int hpage;
180 module_param(hpage, int, 0444);
181 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182
183 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
184 static u8 halt_poll_max_steal = 10;
185 module_param(halt_poll_max_steal, byte, 0644);
186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
187
188 /* if set to true, the GISA will be initialized and used if available */
189 static bool use_gisa  = true;
190 module_param(use_gisa, bool, 0644);
191 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
192
193 /*
194  * For now we handle at most 16 double words as this is what the s390 base
195  * kernel handles and stores in the prefix page. If we ever need to go beyond
196  * this, this requires changes to code, but the external uapi can stay.
197  */
198 #define SIZE_INTERNAL 16
199
200 /*
201  * Base feature mask that defines default mask for facilities. Consists of the
202  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
203  */
204 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
205 /*
206  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
207  * and defines the facilities that can be enabled via a cpu model.
208  */
209 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
210
211 static unsigned long kvm_s390_fac_size(void)
212 {
213         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
214         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
215         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
216                 sizeof(S390_lowcore.stfle_fac_list));
217
218         return SIZE_INTERNAL;
219 }
220
221 /* available cpu features supported by kvm */
222 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
223 /* available subfunctions indicated via query / "test bit" */
224 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
225
226 static struct gmap_notifier gmap_notifier;
227 static struct gmap_notifier vsie_gmap_notifier;
228 debug_info_t *kvm_s390_dbf;
229 debug_info_t *kvm_s390_dbf_uv;
230
231 /* Section: not file related */
232 int kvm_arch_hardware_enable(void)
233 {
234         /* every s390 is virtualization enabled ;-) */
235         return 0;
236 }
237
238 int kvm_arch_check_processor_compat(void *opaque)
239 {
240         return 0;
241 }
242
243 /* forward declarations */
244 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
245                               unsigned long end);
246 static int sca_switch_to_extended(struct kvm *kvm);
247
248 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
249 {
250         u8 delta_idx = 0;
251
252         /*
253          * The TOD jumps by delta, we have to compensate this by adding
254          * -delta to the epoch.
255          */
256         delta = -delta;
257
258         /* sign-extension - we're adding to signed values below */
259         if ((s64)delta < 0)
260                 delta_idx = -1;
261
262         scb->epoch += delta;
263         if (scb->ecd & ECD_MEF) {
264                 scb->epdx += delta_idx;
265                 if (scb->epoch < delta)
266                         scb->epdx += 1;
267         }
268 }
269
270 /*
271  * This callback is executed during stop_machine(). All CPUs are therefore
272  * temporarily stopped. In order not to change guest behavior, we have to
273  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
274  * so a CPU won't be stopped while calculating with the epoch.
275  */
276 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
277                           void *v)
278 {
279         struct kvm *kvm;
280         struct kvm_vcpu *vcpu;
281         int i;
282         unsigned long long *delta = v;
283
284         list_for_each_entry(kvm, &vm_list, vm_list) {
285                 kvm_for_each_vcpu(i, vcpu, kvm) {
286                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
287                         if (i == 0) {
288                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
289                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
290                         }
291                         if (vcpu->arch.cputm_enabled)
292                                 vcpu->arch.cputm_start += *delta;
293                         if (vcpu->arch.vsie_block)
294                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
295                                                    *delta);
296                 }
297         }
298         return NOTIFY_OK;
299 }
300
301 static struct notifier_block kvm_clock_notifier = {
302         .notifier_call = kvm_clock_sync,
303 };
304
305 int kvm_arch_hardware_setup(void *opaque)
306 {
307         gmap_notifier.notifier_call = kvm_gmap_notifier;
308         gmap_register_pte_notifier(&gmap_notifier);
309         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
310         gmap_register_pte_notifier(&vsie_gmap_notifier);
311         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
312                                        &kvm_clock_notifier);
313         return 0;
314 }
315
316 void kvm_arch_hardware_unsetup(void)
317 {
318         gmap_unregister_pte_notifier(&gmap_notifier);
319         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
320         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
321                                          &kvm_clock_notifier);
322 }
323
324 static void allow_cpu_feat(unsigned long nr)
325 {
326         set_bit_inv(nr, kvm_s390_available_cpu_feat);
327 }
328
329 static inline int plo_test_bit(unsigned char nr)
330 {
331         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
332         int cc;
333
334         asm volatile(
335                 /* Parameter registers are ignored for "test bit" */
336                 "       plo     0,0,0,0(0)\n"
337                 "       ipm     %0\n"
338                 "       srl     %0,28\n"
339                 : "=d" (cc)
340                 : "d" (r0)
341                 : "cc");
342         return cc == 0;
343 }
344
345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347         register unsigned long r0 asm("0") = 0; /* query function */
348         register unsigned long r1 asm("1") = (unsigned long) query;
349
350         asm volatile(
351                 /* Parameter regs are ignored */
352                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
353                 :
354                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
355                 : "cc", "memory");
356 }
357
358 #define INSN_SORTL 0xb938
359 #define INSN_DFLTCC 0xb939
360
361 static void kvm_s390_cpu_feat_init(void)
362 {
363         int i;
364
365         for (i = 0; i < 256; ++i) {
366                 if (plo_test_bit(i))
367                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
368         }
369
370         if (test_facility(28)) /* TOD-clock steering */
371                 ptff(kvm_s390_available_subfunc.ptff,
372                      sizeof(kvm_s390_available_subfunc.ptff),
373                      PTFF_QAF);
374
375         if (test_facility(17)) { /* MSA */
376                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
377                               kvm_s390_available_subfunc.kmac);
378                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
379                               kvm_s390_available_subfunc.kmc);
380                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
381                               kvm_s390_available_subfunc.km);
382                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
383                               kvm_s390_available_subfunc.kimd);
384                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
385                               kvm_s390_available_subfunc.klmd);
386         }
387         if (test_facility(76)) /* MSA3 */
388                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
389                               kvm_s390_available_subfunc.pckmo);
390         if (test_facility(77)) { /* MSA4 */
391                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
392                               kvm_s390_available_subfunc.kmctr);
393                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
394                               kvm_s390_available_subfunc.kmf);
395                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
396                               kvm_s390_available_subfunc.kmo);
397                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
398                               kvm_s390_available_subfunc.pcc);
399         }
400         if (test_facility(57)) /* MSA5 */
401                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
402                               kvm_s390_available_subfunc.ppno);
403
404         if (test_facility(146)) /* MSA8 */
405                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
406                               kvm_s390_available_subfunc.kma);
407
408         if (test_facility(155)) /* MSA9 */
409                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
410                               kvm_s390_available_subfunc.kdsa);
411
412         if (test_facility(150)) /* SORTL */
413                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
414
415         if (test_facility(151)) /* DFLTCC */
416                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
417
418         if (MACHINE_HAS_ESOP)
419                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
420         /*
421          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
422          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
423          */
424         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
425             !test_facility(3) || !nested)
426                 return;
427         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
428         if (sclp.has_64bscao)
429                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
430         if (sclp.has_siif)
431                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
432         if (sclp.has_gpere)
433                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
434         if (sclp.has_gsls)
435                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
436         if (sclp.has_ib)
437                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
438         if (sclp.has_cei)
439                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
440         if (sclp.has_ibs)
441                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
442         if (sclp.has_kss)
443                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
444         /*
445          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
446          * all skey handling functions read/set the skey from the PGSTE
447          * instead of the real storage key.
448          *
449          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
450          * pages being detected as preserved although they are resident.
451          *
452          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
453          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
454          *
455          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
456          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
457          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
458          *
459          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
460          * cannot easily shadow the SCA because of the ipte lock.
461          */
462 }
463
464 int kvm_arch_init(void *opaque)
465 {
466         int rc = -ENOMEM;
467
468         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
469         if (!kvm_s390_dbf)
470                 return -ENOMEM;
471
472         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
473         if (!kvm_s390_dbf_uv)
474                 goto out;
475
476         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
477             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
478                 goto out;
479
480         kvm_s390_cpu_feat_init();
481
482         /* Register floating interrupt controller interface. */
483         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
484         if (rc) {
485                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
486                 goto out;
487         }
488
489         rc = kvm_s390_gib_init(GAL_ISC);
490         if (rc)
491                 goto out;
492
493         return 0;
494
495 out:
496         kvm_arch_exit();
497         return rc;
498 }
499
500 void kvm_arch_exit(void)
501 {
502         kvm_s390_gib_destroy();
503         debug_unregister(kvm_s390_dbf);
504         debug_unregister(kvm_s390_dbf_uv);
505 }
506
507 /* Section: device related */
508 long kvm_arch_dev_ioctl(struct file *filp,
509                         unsigned int ioctl, unsigned long arg)
510 {
511         if (ioctl == KVM_S390_ENABLE_SIE)
512                 return s390_enable_sie();
513         return -EINVAL;
514 }
515
516 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
517 {
518         int r;
519
520         switch (ext) {
521         case KVM_CAP_S390_PSW:
522         case KVM_CAP_S390_GMAP:
523         case KVM_CAP_SYNC_MMU:
524 #ifdef CONFIG_KVM_S390_UCONTROL
525         case KVM_CAP_S390_UCONTROL:
526 #endif
527         case KVM_CAP_ASYNC_PF:
528         case KVM_CAP_SYNC_REGS:
529         case KVM_CAP_ONE_REG:
530         case KVM_CAP_ENABLE_CAP:
531         case KVM_CAP_S390_CSS_SUPPORT:
532         case KVM_CAP_IOEVENTFD:
533         case KVM_CAP_DEVICE_CTRL:
534         case KVM_CAP_S390_IRQCHIP:
535         case KVM_CAP_VM_ATTRIBUTES:
536         case KVM_CAP_MP_STATE:
537         case KVM_CAP_IMMEDIATE_EXIT:
538         case KVM_CAP_S390_INJECT_IRQ:
539         case KVM_CAP_S390_USER_SIGP:
540         case KVM_CAP_S390_USER_STSI:
541         case KVM_CAP_S390_SKEYS:
542         case KVM_CAP_S390_IRQ_STATE:
543         case KVM_CAP_S390_USER_INSTR0:
544         case KVM_CAP_S390_CMMA_MIGRATION:
545         case KVM_CAP_S390_AIS:
546         case KVM_CAP_S390_AIS_MIGRATION:
547         case KVM_CAP_S390_VCPU_RESETS:
548         case KVM_CAP_SET_GUEST_DEBUG:
549                 r = 1;
550                 break;
551         case KVM_CAP_S390_HPAGE_1M:
552                 r = 0;
553                 if (hpage && !kvm_is_ucontrol(kvm))
554                         r = 1;
555                 break;
556         case KVM_CAP_S390_MEM_OP:
557                 r = MEM_OP_MAX_SIZE;
558                 break;
559         case KVM_CAP_NR_VCPUS:
560         case KVM_CAP_MAX_VCPUS:
561         case KVM_CAP_MAX_VCPU_ID:
562                 r = KVM_S390_BSCA_CPU_SLOTS;
563                 if (!kvm_s390_use_sca_entries())
564                         r = KVM_MAX_VCPUS;
565                 else if (sclp.has_esca && sclp.has_64bscao)
566                         r = KVM_S390_ESCA_CPU_SLOTS;
567                 break;
568         case KVM_CAP_S390_COW:
569                 r = MACHINE_HAS_ESOP;
570                 break;
571         case KVM_CAP_S390_VECTOR_REGISTERS:
572                 r = MACHINE_HAS_VX;
573                 break;
574         case KVM_CAP_S390_RI:
575                 r = test_facility(64);
576                 break;
577         case KVM_CAP_S390_GS:
578                 r = test_facility(133);
579                 break;
580         case KVM_CAP_S390_BPB:
581                 r = test_facility(82);
582                 break;
583         case KVM_CAP_S390_PROTECTED:
584                 r = is_prot_virt_host();
585                 break;
586         default:
587                 r = 0;
588         }
589         return r;
590 }
591
592 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
593 {
594         int i;
595         gfn_t cur_gfn, last_gfn;
596         unsigned long gaddr, vmaddr;
597         struct gmap *gmap = kvm->arch.gmap;
598         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
599
600         /* Loop over all guest segments */
601         cur_gfn = memslot->base_gfn;
602         last_gfn = memslot->base_gfn + memslot->npages;
603         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
604                 gaddr = gfn_to_gpa(cur_gfn);
605                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
606                 if (kvm_is_error_hva(vmaddr))
607                         continue;
608
609                 bitmap_zero(bitmap, _PAGE_ENTRIES);
610                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
611                 for (i = 0; i < _PAGE_ENTRIES; i++) {
612                         if (test_bit(i, bitmap))
613                                 mark_page_dirty(kvm, cur_gfn + i);
614                 }
615
616                 if (fatal_signal_pending(current))
617                         return;
618                 cond_resched();
619         }
620 }
621
622 /* Section: vm related */
623 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
624
625 /*
626  * Get (and clear) the dirty memory log for a memory slot.
627  */
628 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
629                                struct kvm_dirty_log *log)
630 {
631         int r;
632         unsigned long n;
633         struct kvm_memory_slot *memslot;
634         int is_dirty;
635
636         if (kvm_is_ucontrol(kvm))
637                 return -EINVAL;
638
639         mutex_lock(&kvm->slots_lock);
640
641         r = -EINVAL;
642         if (log->slot >= KVM_USER_MEM_SLOTS)
643                 goto out;
644
645         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
646         if (r)
647                 goto out;
648
649         /* Clear the dirty log */
650         if (is_dirty) {
651                 n = kvm_dirty_bitmap_bytes(memslot);
652                 memset(memslot->dirty_bitmap, 0, n);
653         }
654         r = 0;
655 out:
656         mutex_unlock(&kvm->slots_lock);
657         return r;
658 }
659
660 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
661 {
662         unsigned int i;
663         struct kvm_vcpu *vcpu;
664
665         kvm_for_each_vcpu(i, vcpu, kvm) {
666                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
667         }
668 }
669
670 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
671 {
672         int r;
673
674         if (cap->flags)
675                 return -EINVAL;
676
677         switch (cap->cap) {
678         case KVM_CAP_S390_IRQCHIP:
679                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
680                 kvm->arch.use_irqchip = 1;
681                 r = 0;
682                 break;
683         case KVM_CAP_S390_USER_SIGP:
684                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
685                 kvm->arch.user_sigp = 1;
686                 r = 0;
687                 break;
688         case KVM_CAP_S390_VECTOR_REGISTERS:
689                 mutex_lock(&kvm->lock);
690                 if (kvm->created_vcpus) {
691                         r = -EBUSY;
692                 } else if (MACHINE_HAS_VX) {
693                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
694                         set_kvm_facility(kvm->arch.model.fac_list, 129);
695                         if (test_facility(134)) {
696                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
697                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
698                         }
699                         if (test_facility(135)) {
700                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
701                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
702                         }
703                         if (test_facility(148)) {
704                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
705                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
706                         }
707                         if (test_facility(152)) {
708                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
709                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
710                         }
711                         r = 0;
712                 } else
713                         r = -EINVAL;
714                 mutex_unlock(&kvm->lock);
715                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
716                          r ? "(not available)" : "(success)");
717                 break;
718         case KVM_CAP_S390_RI:
719                 r = -EINVAL;
720                 mutex_lock(&kvm->lock);
721                 if (kvm->created_vcpus) {
722                         r = -EBUSY;
723                 } else if (test_facility(64)) {
724                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
725                         set_kvm_facility(kvm->arch.model.fac_list, 64);
726                         r = 0;
727                 }
728                 mutex_unlock(&kvm->lock);
729                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
730                          r ? "(not available)" : "(success)");
731                 break;
732         case KVM_CAP_S390_AIS:
733                 mutex_lock(&kvm->lock);
734                 if (kvm->created_vcpus) {
735                         r = -EBUSY;
736                 } else {
737                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
738                         set_kvm_facility(kvm->arch.model.fac_list, 72);
739                         r = 0;
740                 }
741                 mutex_unlock(&kvm->lock);
742                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
743                          r ? "(not available)" : "(success)");
744                 break;
745         case KVM_CAP_S390_GS:
746                 r = -EINVAL;
747                 mutex_lock(&kvm->lock);
748                 if (kvm->created_vcpus) {
749                         r = -EBUSY;
750                 } else if (test_facility(133)) {
751                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
752                         set_kvm_facility(kvm->arch.model.fac_list, 133);
753                         r = 0;
754                 }
755                 mutex_unlock(&kvm->lock);
756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
757                          r ? "(not available)" : "(success)");
758                 break;
759         case KVM_CAP_S390_HPAGE_1M:
760                 mutex_lock(&kvm->lock);
761                 if (kvm->created_vcpus)
762                         r = -EBUSY;
763                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
764                         r = -EINVAL;
765                 else {
766                         r = 0;
767                         down_write(&kvm->mm->mmap_sem);
768                         kvm->mm->context.allow_gmap_hpage_1m = 1;
769                         up_write(&kvm->mm->mmap_sem);
770                         /*
771                          * We might have to create fake 4k page
772                          * tables. To avoid that the hardware works on
773                          * stale PGSTEs, we emulate these instructions.
774                          */
775                         kvm->arch.use_skf = 0;
776                         kvm->arch.use_pfmfi = 0;
777                 }
778                 mutex_unlock(&kvm->lock);
779                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
780                          r ? "(not available)" : "(success)");
781                 break;
782         case KVM_CAP_S390_USER_STSI:
783                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
784                 kvm->arch.user_stsi = 1;
785                 r = 0;
786                 break;
787         case KVM_CAP_S390_USER_INSTR0:
788                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
789                 kvm->arch.user_instr0 = 1;
790                 icpt_operexc_on_all_vcpus(kvm);
791                 r = 0;
792                 break;
793         default:
794                 r = -EINVAL;
795                 break;
796         }
797         return r;
798 }
799
800 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
801 {
802         int ret;
803
804         switch (attr->attr) {
805         case KVM_S390_VM_MEM_LIMIT_SIZE:
806                 ret = 0;
807                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
808                          kvm->arch.mem_limit);
809                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
810                         ret = -EFAULT;
811                 break;
812         default:
813                 ret = -ENXIO;
814                 break;
815         }
816         return ret;
817 }
818
819 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
820 {
821         int ret;
822         unsigned int idx;
823         switch (attr->attr) {
824         case KVM_S390_VM_MEM_ENABLE_CMMA:
825                 ret = -ENXIO;
826                 if (!sclp.has_cmma)
827                         break;
828
829                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
830                 mutex_lock(&kvm->lock);
831                 if (kvm->created_vcpus)
832                         ret = -EBUSY;
833                 else if (kvm->mm->context.allow_gmap_hpage_1m)
834                         ret = -EINVAL;
835                 else {
836                         kvm->arch.use_cmma = 1;
837                         /* Not compatible with cmma. */
838                         kvm->arch.use_pfmfi = 0;
839                         ret = 0;
840                 }
841                 mutex_unlock(&kvm->lock);
842                 break;
843         case KVM_S390_VM_MEM_CLR_CMMA:
844                 ret = -ENXIO;
845                 if (!sclp.has_cmma)
846                         break;
847                 ret = -EINVAL;
848                 if (!kvm->arch.use_cmma)
849                         break;
850
851                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
852                 mutex_lock(&kvm->lock);
853                 idx = srcu_read_lock(&kvm->srcu);
854                 s390_reset_cmma(kvm->arch.gmap->mm);
855                 srcu_read_unlock(&kvm->srcu, idx);
856                 mutex_unlock(&kvm->lock);
857                 ret = 0;
858                 break;
859         case KVM_S390_VM_MEM_LIMIT_SIZE: {
860                 unsigned long new_limit;
861
862                 if (kvm_is_ucontrol(kvm))
863                         return -EINVAL;
864
865                 if (get_user(new_limit, (u64 __user *)attr->addr))
866                         return -EFAULT;
867
868                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
869                     new_limit > kvm->arch.mem_limit)
870                         return -E2BIG;
871
872                 if (!new_limit)
873                         return -EINVAL;
874
875                 /* gmap_create takes last usable address */
876                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
877                         new_limit -= 1;
878
879                 ret = -EBUSY;
880                 mutex_lock(&kvm->lock);
881                 if (!kvm->created_vcpus) {
882                         /* gmap_create will round the limit up */
883                         struct gmap *new = gmap_create(current->mm, new_limit);
884
885                         if (!new) {
886                                 ret = -ENOMEM;
887                         } else {
888                                 gmap_remove(kvm->arch.gmap);
889                                 new->private = kvm;
890                                 kvm->arch.gmap = new;
891                                 ret = 0;
892                         }
893                 }
894                 mutex_unlock(&kvm->lock);
895                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
896                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
897                          (void *) kvm->arch.gmap->asce);
898                 break;
899         }
900         default:
901                 ret = -ENXIO;
902                 break;
903         }
904         return ret;
905 }
906
907 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
908
909 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
910 {
911         struct kvm_vcpu *vcpu;
912         int i;
913
914         kvm_s390_vcpu_block_all(kvm);
915
916         kvm_for_each_vcpu(i, vcpu, kvm) {
917                 kvm_s390_vcpu_crypto_setup(vcpu);
918                 /* recreate the shadow crycb by leaving the VSIE handler */
919                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
920         }
921
922         kvm_s390_vcpu_unblock_all(kvm);
923 }
924
925 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
926 {
927         mutex_lock(&kvm->lock);
928         switch (attr->attr) {
929         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
930                 if (!test_kvm_facility(kvm, 76)) {
931                         mutex_unlock(&kvm->lock);
932                         return -EINVAL;
933                 }
934                 get_random_bytes(
935                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
936                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
937                 kvm->arch.crypto.aes_kw = 1;
938                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
939                 break;
940         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
941                 if (!test_kvm_facility(kvm, 76)) {
942                         mutex_unlock(&kvm->lock);
943                         return -EINVAL;
944                 }
945                 get_random_bytes(
946                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
947                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
948                 kvm->arch.crypto.dea_kw = 1;
949                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
950                 break;
951         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
952                 if (!test_kvm_facility(kvm, 76)) {
953                         mutex_unlock(&kvm->lock);
954                         return -EINVAL;
955                 }
956                 kvm->arch.crypto.aes_kw = 0;
957                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
958                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
959                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
960                 break;
961         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
962                 if (!test_kvm_facility(kvm, 76)) {
963                         mutex_unlock(&kvm->lock);
964                         return -EINVAL;
965                 }
966                 kvm->arch.crypto.dea_kw = 0;
967                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
968                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
969                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
970                 break;
971         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
972                 if (!ap_instructions_available()) {
973                         mutex_unlock(&kvm->lock);
974                         return -EOPNOTSUPP;
975                 }
976                 kvm->arch.crypto.apie = 1;
977                 break;
978         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
979                 if (!ap_instructions_available()) {
980                         mutex_unlock(&kvm->lock);
981                         return -EOPNOTSUPP;
982                 }
983                 kvm->arch.crypto.apie = 0;
984                 break;
985         default:
986                 mutex_unlock(&kvm->lock);
987                 return -ENXIO;
988         }
989
990         kvm_s390_vcpu_crypto_reset_all(kvm);
991         mutex_unlock(&kvm->lock);
992         return 0;
993 }
994
995 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
996 {
997         int cx;
998         struct kvm_vcpu *vcpu;
999
1000         kvm_for_each_vcpu(cx, vcpu, kvm)
1001                 kvm_s390_sync_request(req, vcpu);
1002 }
1003
1004 /*
1005  * Must be called with kvm->srcu held to avoid races on memslots, and with
1006  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1007  */
1008 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1009 {
1010         struct kvm_memory_slot *ms;
1011         struct kvm_memslots *slots;
1012         unsigned long ram_pages = 0;
1013         int slotnr;
1014
1015         /* migration mode already enabled */
1016         if (kvm->arch.migration_mode)
1017                 return 0;
1018         slots = kvm_memslots(kvm);
1019         if (!slots || !slots->used_slots)
1020                 return -EINVAL;
1021
1022         if (!kvm->arch.use_cmma) {
1023                 kvm->arch.migration_mode = 1;
1024                 return 0;
1025         }
1026         /* mark all the pages in active slots as dirty */
1027         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1028                 ms = slots->memslots + slotnr;
1029                 if (!ms->dirty_bitmap)
1030                         return -EINVAL;
1031                 /*
1032                  * The second half of the bitmap is only used on x86,
1033                  * and would be wasted otherwise, so we put it to good
1034                  * use here to keep track of the state of the storage
1035                  * attributes.
1036                  */
1037                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1038                 ram_pages += ms->npages;
1039         }
1040         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1041         kvm->arch.migration_mode = 1;
1042         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1043         return 0;
1044 }
1045
1046 /*
1047  * Must be called with kvm->slots_lock to avoid races with ourselves and
1048  * kvm_s390_vm_start_migration.
1049  */
1050 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1051 {
1052         /* migration mode already disabled */
1053         if (!kvm->arch.migration_mode)
1054                 return 0;
1055         kvm->arch.migration_mode = 0;
1056         if (kvm->arch.use_cmma)
1057                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1058         return 0;
1059 }
1060
1061 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1062                                      struct kvm_device_attr *attr)
1063 {
1064         int res = -ENXIO;
1065
1066         mutex_lock(&kvm->slots_lock);
1067         switch (attr->attr) {
1068         case KVM_S390_VM_MIGRATION_START:
1069                 res = kvm_s390_vm_start_migration(kvm);
1070                 break;
1071         case KVM_S390_VM_MIGRATION_STOP:
1072                 res = kvm_s390_vm_stop_migration(kvm);
1073                 break;
1074         default:
1075                 break;
1076         }
1077         mutex_unlock(&kvm->slots_lock);
1078
1079         return res;
1080 }
1081
1082 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1083                                      struct kvm_device_attr *attr)
1084 {
1085         u64 mig = kvm->arch.migration_mode;
1086
1087         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1088                 return -ENXIO;
1089
1090         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1091                 return -EFAULT;
1092         return 0;
1093 }
1094
1095 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1096 {
1097         struct kvm_s390_vm_tod_clock gtod;
1098
1099         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1100                 return -EFAULT;
1101
1102         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1103                 return -EINVAL;
1104         kvm_s390_set_tod_clock(kvm, &gtod);
1105
1106         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1107                 gtod.epoch_idx, gtod.tod);
1108
1109         return 0;
1110 }
1111
1112 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1113 {
1114         u8 gtod_high;
1115
1116         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1117                                            sizeof(gtod_high)))
1118                 return -EFAULT;
1119
1120         if (gtod_high != 0)
1121                 return -EINVAL;
1122         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1123
1124         return 0;
1125 }
1126
1127 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1128 {
1129         struct kvm_s390_vm_tod_clock gtod = { 0 };
1130
1131         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1132                            sizeof(gtod.tod)))
1133                 return -EFAULT;
1134
1135         kvm_s390_set_tod_clock(kvm, &gtod);
1136         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1137         return 0;
1138 }
1139
1140 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1141 {
1142         int ret;
1143
1144         if (attr->flags)
1145                 return -EINVAL;
1146
1147         switch (attr->attr) {
1148         case KVM_S390_VM_TOD_EXT:
1149                 ret = kvm_s390_set_tod_ext(kvm, attr);
1150                 break;
1151         case KVM_S390_VM_TOD_HIGH:
1152                 ret = kvm_s390_set_tod_high(kvm, attr);
1153                 break;
1154         case KVM_S390_VM_TOD_LOW:
1155                 ret = kvm_s390_set_tod_low(kvm, attr);
1156                 break;
1157         default:
1158                 ret = -ENXIO;
1159                 break;
1160         }
1161         return ret;
1162 }
1163
1164 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1165                                    struct kvm_s390_vm_tod_clock *gtod)
1166 {
1167         struct kvm_s390_tod_clock_ext htod;
1168
1169         preempt_disable();
1170
1171         get_tod_clock_ext((char *)&htod);
1172
1173         gtod->tod = htod.tod + kvm->arch.epoch;
1174         gtod->epoch_idx = 0;
1175         if (test_kvm_facility(kvm, 139)) {
1176                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1177                 if (gtod->tod < htod.tod)
1178                         gtod->epoch_idx += 1;
1179         }
1180
1181         preempt_enable();
1182 }
1183
1184 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1185 {
1186         struct kvm_s390_vm_tod_clock gtod;
1187
1188         memset(&gtod, 0, sizeof(gtod));
1189         kvm_s390_get_tod_clock(kvm, &gtod);
1190         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1191                 return -EFAULT;
1192
1193         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1194                 gtod.epoch_idx, gtod.tod);
1195         return 0;
1196 }
1197
1198 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1199 {
1200         u8 gtod_high = 0;
1201
1202         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1203                                          sizeof(gtod_high)))
1204                 return -EFAULT;
1205         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1206
1207         return 0;
1208 }
1209
1210 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212         u64 gtod;
1213
1214         gtod = kvm_s390_get_tod_clock_fast(kvm);
1215         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1216                 return -EFAULT;
1217         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1218
1219         return 0;
1220 }
1221
1222 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1223 {
1224         int ret;
1225
1226         if (attr->flags)
1227                 return -EINVAL;
1228
1229         switch (attr->attr) {
1230         case KVM_S390_VM_TOD_EXT:
1231                 ret = kvm_s390_get_tod_ext(kvm, attr);
1232                 break;
1233         case KVM_S390_VM_TOD_HIGH:
1234                 ret = kvm_s390_get_tod_high(kvm, attr);
1235                 break;
1236         case KVM_S390_VM_TOD_LOW:
1237                 ret = kvm_s390_get_tod_low(kvm, attr);
1238                 break;
1239         default:
1240                 ret = -ENXIO;
1241                 break;
1242         }
1243         return ret;
1244 }
1245
1246 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1247 {
1248         struct kvm_s390_vm_cpu_processor *proc;
1249         u16 lowest_ibc, unblocked_ibc;
1250         int ret = 0;
1251
1252         mutex_lock(&kvm->lock);
1253         if (kvm->created_vcpus) {
1254                 ret = -EBUSY;
1255                 goto out;
1256         }
1257         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1258         if (!proc) {
1259                 ret = -ENOMEM;
1260                 goto out;
1261         }
1262         if (!copy_from_user(proc, (void __user *)attr->addr,
1263                             sizeof(*proc))) {
1264                 kvm->arch.model.cpuid = proc->cpuid;
1265                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1266                 unblocked_ibc = sclp.ibc & 0xfff;
1267                 if (lowest_ibc && proc->ibc) {
1268                         if (proc->ibc > unblocked_ibc)
1269                                 kvm->arch.model.ibc = unblocked_ibc;
1270                         else if (proc->ibc < lowest_ibc)
1271                                 kvm->arch.model.ibc = lowest_ibc;
1272                         else
1273                                 kvm->arch.model.ibc = proc->ibc;
1274                 }
1275                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1276                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1277                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1278                          kvm->arch.model.ibc,
1279                          kvm->arch.model.cpuid);
1280                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1281                          kvm->arch.model.fac_list[0],
1282                          kvm->arch.model.fac_list[1],
1283                          kvm->arch.model.fac_list[2]);
1284         } else
1285                 ret = -EFAULT;
1286         kfree(proc);
1287 out:
1288         mutex_unlock(&kvm->lock);
1289         return ret;
1290 }
1291
1292 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1293                                        struct kvm_device_attr *attr)
1294 {
1295         struct kvm_s390_vm_cpu_feat data;
1296
1297         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1298                 return -EFAULT;
1299         if (!bitmap_subset((unsigned long *) data.feat,
1300                            kvm_s390_available_cpu_feat,
1301                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1302                 return -EINVAL;
1303
1304         mutex_lock(&kvm->lock);
1305         if (kvm->created_vcpus) {
1306                 mutex_unlock(&kvm->lock);
1307                 return -EBUSY;
1308         }
1309         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1310                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1311         mutex_unlock(&kvm->lock);
1312         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1313                          data.feat[0],
1314                          data.feat[1],
1315                          data.feat[2]);
1316         return 0;
1317 }
1318
1319 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1320                                           struct kvm_device_attr *attr)
1321 {
1322         mutex_lock(&kvm->lock);
1323         if (kvm->created_vcpus) {
1324                 mutex_unlock(&kvm->lock);
1325                 return -EBUSY;
1326         }
1327
1328         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1329                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1330                 mutex_unlock(&kvm->lock);
1331                 return -EFAULT;
1332         }
1333         mutex_unlock(&kvm->lock);
1334
1335         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1340         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1343         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1346         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1349         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1352         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1355         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1358         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1361         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1364         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1367         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1370         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1373         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1376         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1379         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1382         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1387         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1392
1393         return 0;
1394 }
1395
1396 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1397 {
1398         int ret = -ENXIO;
1399
1400         switch (attr->attr) {
1401         case KVM_S390_VM_CPU_PROCESSOR:
1402                 ret = kvm_s390_set_processor(kvm, attr);
1403                 break;
1404         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1405                 ret = kvm_s390_set_processor_feat(kvm, attr);
1406                 break;
1407         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1408                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1409                 break;
1410         }
1411         return ret;
1412 }
1413
1414 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1415 {
1416         struct kvm_s390_vm_cpu_processor *proc;
1417         int ret = 0;
1418
1419         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1420         if (!proc) {
1421                 ret = -ENOMEM;
1422                 goto out;
1423         }
1424         proc->cpuid = kvm->arch.model.cpuid;
1425         proc->ibc = kvm->arch.model.ibc;
1426         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1427                S390_ARCH_FAC_LIST_SIZE_BYTE);
1428         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1429                  kvm->arch.model.ibc,
1430                  kvm->arch.model.cpuid);
1431         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1432                  kvm->arch.model.fac_list[0],
1433                  kvm->arch.model.fac_list[1],
1434                  kvm->arch.model.fac_list[2]);
1435         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1436                 ret = -EFAULT;
1437         kfree(proc);
1438 out:
1439         return ret;
1440 }
1441
1442 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1443 {
1444         struct kvm_s390_vm_cpu_machine *mach;
1445         int ret = 0;
1446
1447         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1448         if (!mach) {
1449                 ret = -ENOMEM;
1450                 goto out;
1451         }
1452         get_cpu_id((struct cpuid *) &mach->cpuid);
1453         mach->ibc = sclp.ibc;
1454         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1455                S390_ARCH_FAC_LIST_SIZE_BYTE);
1456         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1457                sizeof(S390_lowcore.stfle_fac_list));
1458         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1459                  kvm->arch.model.ibc,
1460                  kvm->arch.model.cpuid);
1461         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1462                  mach->fac_mask[0],
1463                  mach->fac_mask[1],
1464                  mach->fac_mask[2]);
1465         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1466                  mach->fac_list[0],
1467                  mach->fac_list[1],
1468                  mach->fac_list[2]);
1469         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1470                 ret = -EFAULT;
1471         kfree(mach);
1472 out:
1473         return ret;
1474 }
1475
1476 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1477                                        struct kvm_device_attr *attr)
1478 {
1479         struct kvm_s390_vm_cpu_feat data;
1480
1481         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1482                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1483         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1484                 return -EFAULT;
1485         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1486                          data.feat[0],
1487                          data.feat[1],
1488                          data.feat[2]);
1489         return 0;
1490 }
1491
1492 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1493                                      struct kvm_device_attr *attr)
1494 {
1495         struct kvm_s390_vm_cpu_feat data;
1496
1497         bitmap_copy((unsigned long *) data.feat,
1498                     kvm_s390_available_cpu_feat,
1499                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1500         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1501                 return -EFAULT;
1502         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1503                          data.feat[0],
1504                          data.feat[1],
1505                          data.feat[2]);
1506         return 0;
1507 }
1508
1509 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1510                                           struct kvm_device_attr *attr)
1511 {
1512         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1513             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1514                 return -EFAULT;
1515
1516         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1521         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1524         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1527         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1530         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1533         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1536         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1539         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1542         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1545         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1548         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1551         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1554         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1557         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1560         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1563         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1568         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1573
1574         return 0;
1575 }
1576
1577 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1578                                         struct kvm_device_attr *attr)
1579 {
1580         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1581             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1582                 return -EFAULT;
1583
1584         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1588                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1589         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1591                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1592         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1593                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1594                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1595         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1596                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1597                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1598         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1599                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1600                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1601         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1604         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1605                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1606                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1607         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1608                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1610         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1613         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1614                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1616         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1619         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1620                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1621                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1622         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1623                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1625         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1628         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1630                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1631         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1632                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1633                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1636         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1640                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1641
1642         return 0;
1643 }
1644
1645 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1646 {
1647         int ret = -ENXIO;
1648
1649         switch (attr->attr) {
1650         case KVM_S390_VM_CPU_PROCESSOR:
1651                 ret = kvm_s390_get_processor(kvm, attr);
1652                 break;
1653         case KVM_S390_VM_CPU_MACHINE:
1654                 ret = kvm_s390_get_machine(kvm, attr);
1655                 break;
1656         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1657                 ret = kvm_s390_get_processor_feat(kvm, attr);
1658                 break;
1659         case KVM_S390_VM_CPU_MACHINE_FEAT:
1660                 ret = kvm_s390_get_machine_feat(kvm, attr);
1661                 break;
1662         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1663                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1664                 break;
1665         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1666                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1667                 break;
1668         }
1669         return ret;
1670 }
1671
1672 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1673 {
1674         int ret;
1675
1676         switch (attr->group) {
1677         case KVM_S390_VM_MEM_CTRL:
1678                 ret = kvm_s390_set_mem_control(kvm, attr);
1679                 break;
1680         case KVM_S390_VM_TOD:
1681                 ret = kvm_s390_set_tod(kvm, attr);
1682                 break;
1683         case KVM_S390_VM_CPU_MODEL:
1684                 ret = kvm_s390_set_cpu_model(kvm, attr);
1685                 break;
1686         case KVM_S390_VM_CRYPTO:
1687                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1688                 break;
1689         case KVM_S390_VM_MIGRATION:
1690                 ret = kvm_s390_vm_set_migration(kvm, attr);
1691                 break;
1692         default:
1693                 ret = -ENXIO;
1694                 break;
1695         }
1696
1697         return ret;
1698 }
1699
1700 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1701 {
1702         int ret;
1703
1704         switch (attr->group) {
1705         case KVM_S390_VM_MEM_CTRL:
1706                 ret = kvm_s390_get_mem_control(kvm, attr);
1707                 break;
1708         case KVM_S390_VM_TOD:
1709                 ret = kvm_s390_get_tod(kvm, attr);
1710                 break;
1711         case KVM_S390_VM_CPU_MODEL:
1712                 ret = kvm_s390_get_cpu_model(kvm, attr);
1713                 break;
1714         case KVM_S390_VM_MIGRATION:
1715                 ret = kvm_s390_vm_get_migration(kvm, attr);
1716                 break;
1717         default:
1718                 ret = -ENXIO;
1719                 break;
1720         }
1721
1722         return ret;
1723 }
1724
1725 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1726 {
1727         int ret;
1728
1729         switch (attr->group) {
1730         case KVM_S390_VM_MEM_CTRL:
1731                 switch (attr->attr) {
1732                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1733                 case KVM_S390_VM_MEM_CLR_CMMA:
1734                         ret = sclp.has_cmma ? 0 : -ENXIO;
1735                         break;
1736                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1737                         ret = 0;
1738                         break;
1739                 default:
1740                         ret = -ENXIO;
1741                         break;
1742                 }
1743                 break;
1744         case KVM_S390_VM_TOD:
1745                 switch (attr->attr) {
1746                 case KVM_S390_VM_TOD_LOW:
1747                 case KVM_S390_VM_TOD_HIGH:
1748                         ret = 0;
1749                         break;
1750                 default:
1751                         ret = -ENXIO;
1752                         break;
1753                 }
1754                 break;
1755         case KVM_S390_VM_CPU_MODEL:
1756                 switch (attr->attr) {
1757                 case KVM_S390_VM_CPU_PROCESSOR:
1758                 case KVM_S390_VM_CPU_MACHINE:
1759                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1760                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1761                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1762                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1763                         ret = 0;
1764                         break;
1765                 default:
1766                         ret = -ENXIO;
1767                         break;
1768                 }
1769                 break;
1770         case KVM_S390_VM_CRYPTO:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1773                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1774                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1775                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1776                         ret = 0;
1777                         break;
1778                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1779                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1780                         ret = ap_instructions_available() ? 0 : -ENXIO;
1781                         break;
1782                 default:
1783                         ret = -ENXIO;
1784                         break;
1785                 }
1786                 break;
1787         case KVM_S390_VM_MIGRATION:
1788                 ret = 0;
1789                 break;
1790         default:
1791                 ret = -ENXIO;
1792                 break;
1793         }
1794
1795         return ret;
1796 }
1797
1798 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1799 {
1800         uint8_t *keys;
1801         uint64_t hva;
1802         int srcu_idx, i, r = 0;
1803
1804         if (args->flags != 0)
1805                 return -EINVAL;
1806
1807         /* Is this guest using storage keys? */
1808         if (!mm_uses_skeys(current->mm))
1809                 return KVM_S390_GET_SKEYS_NONE;
1810
1811         /* Enforce sane limit on memory allocation */
1812         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1813                 return -EINVAL;
1814
1815         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1816         if (!keys)
1817                 return -ENOMEM;
1818
1819         down_read(&current->mm->mmap_sem);
1820         srcu_idx = srcu_read_lock(&kvm->srcu);
1821         for (i = 0; i < args->count; i++) {
1822                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1823                 if (kvm_is_error_hva(hva)) {
1824                         r = -EFAULT;
1825                         break;
1826                 }
1827
1828                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1829                 if (r)
1830                         break;
1831         }
1832         srcu_read_unlock(&kvm->srcu, srcu_idx);
1833         up_read(&current->mm->mmap_sem);
1834
1835         if (!r) {
1836                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1837                                  sizeof(uint8_t) * args->count);
1838                 if (r)
1839                         r = -EFAULT;
1840         }
1841
1842         kvfree(keys);
1843         return r;
1844 }
1845
1846 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1847 {
1848         uint8_t *keys;
1849         uint64_t hva;
1850         int srcu_idx, i, r = 0;
1851         bool unlocked;
1852
1853         if (args->flags != 0)
1854                 return -EINVAL;
1855
1856         /* Enforce sane limit on memory allocation */
1857         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1858                 return -EINVAL;
1859
1860         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1861         if (!keys)
1862                 return -ENOMEM;
1863
1864         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1865                            sizeof(uint8_t) * args->count);
1866         if (r) {
1867                 r = -EFAULT;
1868                 goto out;
1869         }
1870
1871         /* Enable storage key handling for the guest */
1872         r = s390_enable_skey();
1873         if (r)
1874                 goto out;
1875
1876         i = 0;
1877         down_read(&current->mm->mmap_sem);
1878         srcu_idx = srcu_read_lock(&kvm->srcu);
1879         while (i < args->count) {
1880                 unlocked = false;
1881                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1882                 if (kvm_is_error_hva(hva)) {
1883                         r = -EFAULT;
1884                         break;
1885                 }
1886
1887                 /* Lowest order bit is reserved */
1888                 if (keys[i] & 0x01) {
1889                         r = -EINVAL;
1890                         break;
1891                 }
1892
1893                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1894                 if (r) {
1895                         r = fixup_user_fault(current, current->mm, hva,
1896                                              FAULT_FLAG_WRITE, &unlocked);
1897                         if (r)
1898                                 break;
1899                 }
1900                 if (!r)
1901                         i++;
1902         }
1903         srcu_read_unlock(&kvm->srcu, srcu_idx);
1904         up_read(&current->mm->mmap_sem);
1905 out:
1906         kvfree(keys);
1907         return r;
1908 }
1909
1910 /*
1911  * Base address and length must be sent at the start of each block, therefore
1912  * it's cheaper to send some clean data, as long as it's less than the size of
1913  * two longs.
1914  */
1915 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1916 /* for consistency */
1917 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1918
1919 /*
1920  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1921  * address falls in a hole. In that case the index of one of the memslots
1922  * bordering the hole is returned.
1923  */
1924 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1925 {
1926         int start = 0, end = slots->used_slots;
1927         int slot = atomic_read(&slots->lru_slot);
1928         struct kvm_memory_slot *memslots = slots->memslots;
1929
1930         if (gfn >= memslots[slot].base_gfn &&
1931             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1932                 return slot;
1933
1934         while (start < end) {
1935                 slot = start + (end - start) / 2;
1936
1937                 if (gfn >= memslots[slot].base_gfn)
1938                         end = slot;
1939                 else
1940                         start = slot + 1;
1941         }
1942
1943         if (start >= slots->used_slots)
1944                 return slots->used_slots - 1;
1945
1946         if (gfn >= memslots[start].base_gfn &&
1947             gfn < memslots[start].base_gfn + memslots[start].npages) {
1948                 atomic_set(&slots->lru_slot, start);
1949         }
1950
1951         return start;
1952 }
1953
1954 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1955                               u8 *res, unsigned long bufsize)
1956 {
1957         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1958
1959         args->count = 0;
1960         while (args->count < bufsize) {
1961                 hva = gfn_to_hva(kvm, cur_gfn);
1962                 /*
1963                  * We return an error if the first value was invalid, but we
1964                  * return successfully if at least one value was copied.
1965                  */
1966                 if (kvm_is_error_hva(hva))
1967                         return args->count ? 0 : -EFAULT;
1968                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1969                         pgstev = 0;
1970                 res[args->count++] = (pgstev >> 24) & 0x43;
1971                 cur_gfn++;
1972         }
1973
1974         return 0;
1975 }
1976
1977 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1978                                               unsigned long cur_gfn)
1979 {
1980         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1981         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1982         unsigned long ofs = cur_gfn - ms->base_gfn;
1983
1984         if (ms->base_gfn + ms->npages <= cur_gfn) {
1985                 slotidx--;
1986                 /* If we are above the highest slot, wrap around */
1987                 if (slotidx < 0)
1988                         slotidx = slots->used_slots - 1;
1989
1990                 ms = slots->memslots + slotidx;
1991                 ofs = 0;
1992         }
1993         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1994         while ((slotidx > 0) && (ofs >= ms->npages)) {
1995                 slotidx--;
1996                 ms = slots->memslots + slotidx;
1997                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1998         }
1999         return ms->base_gfn + ofs;
2000 }
2001
2002 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2003                              u8 *res, unsigned long bufsize)
2004 {
2005         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2006         struct kvm_memslots *slots = kvm_memslots(kvm);
2007         struct kvm_memory_slot *ms;
2008
2009         if (unlikely(!slots->used_slots))
2010                 return 0;
2011
2012         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2013         ms = gfn_to_memslot(kvm, cur_gfn);
2014         args->count = 0;
2015         args->start_gfn = cur_gfn;
2016         if (!ms)
2017                 return 0;
2018         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2019         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2020
2021         while (args->count < bufsize) {
2022                 hva = gfn_to_hva(kvm, cur_gfn);
2023                 if (kvm_is_error_hva(hva))
2024                         return 0;
2025                 /* Decrement only if we actually flipped the bit to 0 */
2026                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2027                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2028                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2029                         pgstev = 0;
2030                 /* Save the value */
2031                 res[args->count++] = (pgstev >> 24) & 0x43;
2032                 /* If the next bit is too far away, stop. */
2033                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2034                         return 0;
2035                 /* If we reached the previous "next", find the next one */
2036                 if (cur_gfn == next_gfn)
2037                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2038                 /* Reached the end of memory or of the buffer, stop */
2039                 if ((next_gfn >= mem_end) ||
2040                     (next_gfn - args->start_gfn >= bufsize))
2041                         return 0;
2042                 cur_gfn++;
2043                 /* Reached the end of the current memslot, take the next one. */
2044                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2045                         ms = gfn_to_memslot(kvm, cur_gfn);
2046                         if (!ms)
2047                                 return 0;
2048                 }
2049         }
2050         return 0;
2051 }
2052
2053 /*
2054  * This function searches for the next page with dirty CMMA attributes, and
2055  * saves the attributes in the buffer up to either the end of the buffer or
2056  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2057  * no trailing clean bytes are saved.
2058  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2059  * output buffer will indicate 0 as length.
2060  */
2061 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2062                                   struct kvm_s390_cmma_log *args)
2063 {
2064         unsigned long bufsize;
2065         int srcu_idx, peek, ret;
2066         u8 *values;
2067
2068         if (!kvm->arch.use_cmma)
2069                 return -ENXIO;
2070         /* Invalid/unsupported flags were specified */
2071         if (args->flags & ~KVM_S390_CMMA_PEEK)
2072                 return -EINVAL;
2073         /* Migration mode query, and we are not doing a migration */
2074         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2075         if (!peek && !kvm->arch.migration_mode)
2076                 return -EINVAL;
2077         /* CMMA is disabled or was not used, or the buffer has length zero */
2078         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2079         if (!bufsize || !kvm->mm->context.uses_cmm) {
2080                 memset(args, 0, sizeof(*args));
2081                 return 0;
2082         }
2083         /* We are not peeking, and there are no dirty pages */
2084         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2085                 memset(args, 0, sizeof(*args));
2086                 return 0;
2087         }
2088
2089         values = vmalloc(bufsize);
2090         if (!values)
2091                 return -ENOMEM;
2092
2093         down_read(&kvm->mm->mmap_sem);
2094         srcu_idx = srcu_read_lock(&kvm->srcu);
2095         if (peek)
2096                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2097         else
2098                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2099         srcu_read_unlock(&kvm->srcu, srcu_idx);
2100         up_read(&kvm->mm->mmap_sem);
2101
2102         if (kvm->arch.migration_mode)
2103                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2104         else
2105                 args->remaining = 0;
2106
2107         if (copy_to_user((void __user *)args->values, values, args->count))
2108                 ret = -EFAULT;
2109
2110         vfree(values);
2111         return ret;
2112 }
2113
2114 /*
2115  * This function sets the CMMA attributes for the given pages. If the input
2116  * buffer has zero length, no action is taken, otherwise the attributes are
2117  * set and the mm->context.uses_cmm flag is set.
2118  */
2119 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2120                                   const struct kvm_s390_cmma_log *args)
2121 {
2122         unsigned long hva, mask, pgstev, i;
2123         uint8_t *bits;
2124         int srcu_idx, r = 0;
2125
2126         mask = args->mask;
2127
2128         if (!kvm->arch.use_cmma)
2129                 return -ENXIO;
2130         /* invalid/unsupported flags */
2131         if (args->flags != 0)
2132                 return -EINVAL;
2133         /* Enforce sane limit on memory allocation */
2134         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2135                 return -EINVAL;
2136         /* Nothing to do */
2137         if (args->count == 0)
2138                 return 0;
2139
2140         bits = vmalloc(array_size(sizeof(*bits), args->count));
2141         if (!bits)
2142                 return -ENOMEM;
2143
2144         r = copy_from_user(bits, (void __user *)args->values, args->count);
2145         if (r) {
2146                 r = -EFAULT;
2147                 goto out;
2148         }
2149
2150         down_read(&kvm->mm->mmap_sem);
2151         srcu_idx = srcu_read_lock(&kvm->srcu);
2152         for (i = 0; i < args->count; i++) {
2153                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2154                 if (kvm_is_error_hva(hva)) {
2155                         r = -EFAULT;
2156                         break;
2157                 }
2158
2159                 pgstev = bits[i];
2160                 pgstev = pgstev << 24;
2161                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2162                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2163         }
2164         srcu_read_unlock(&kvm->srcu, srcu_idx);
2165         up_read(&kvm->mm->mmap_sem);
2166
2167         if (!kvm->mm->context.uses_cmm) {
2168                 down_write(&kvm->mm->mmap_sem);
2169                 kvm->mm->context.uses_cmm = 1;
2170                 up_write(&kvm->mm->mmap_sem);
2171         }
2172 out:
2173         vfree(bits);
2174         return r;
2175 }
2176
2177 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2178 {
2179         struct kvm_vcpu *vcpu;
2180         u16 rc, rrc;
2181         int ret = 0;
2182         int i;
2183
2184         /*
2185          * We ignore failures and try to destroy as many CPUs as possible.
2186          * At the same time we must not free the assigned resources when
2187          * this fails, as the ultravisor has still access to that memory.
2188          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2189          * behind.
2190          * We want to return the first failure rc and rrc, though.
2191          */
2192         kvm_for_each_vcpu(i, vcpu, kvm) {
2193                 mutex_lock(&vcpu->mutex);
2194                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2195                         *rcp = rc;
2196                         *rrcp = rrc;
2197                         ret = -EIO;
2198                 }
2199                 mutex_unlock(&vcpu->mutex);
2200         }
2201         return ret;
2202 }
2203
2204 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2205 {
2206         int i, r = 0;
2207         u16 dummy;
2208
2209         struct kvm_vcpu *vcpu;
2210
2211         kvm_for_each_vcpu(i, vcpu, kvm) {
2212                 mutex_lock(&vcpu->mutex);
2213                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2214                 mutex_unlock(&vcpu->mutex);
2215                 if (r)
2216                         break;
2217         }
2218         if (r)
2219                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220         return r;
2221 }
2222
2223 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 {
2225         int r = 0;
2226         u16 dummy;
2227         void __user *argp = (void __user *)cmd->data;
2228
2229         switch (cmd->cmd) {
2230         case KVM_PV_ENABLE: {
2231                 r = -EINVAL;
2232                 if (kvm_s390_pv_is_protected(kvm))
2233                         break;
2234
2235                 /*
2236                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2237                  *  esca, we need no cleanup in the error cases below
2238                  */
2239                 r = sca_switch_to_extended(kvm);
2240                 if (r)
2241                         break;
2242
2243                 down_write(&current->mm->mmap_sem);
2244                 r = gmap_mark_unmergeable();
2245                 up_write(&current->mm->mmap_sem);
2246                 if (r)
2247                         break;
2248
2249                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250                 if (r)
2251                         break;
2252
2253                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2254                 if (r)
2255                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2256
2257                 /* we need to block service interrupts from now on */
2258                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2259                 break;
2260         }
2261         case KVM_PV_DISABLE: {
2262                 r = -EINVAL;
2263                 if (!kvm_s390_pv_is_protected(kvm))
2264                         break;
2265
2266                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2267                 /*
2268                  * If a CPU could not be destroyed, destroy VM will also fail.
2269                  * There is no point in trying to destroy it. Instead return
2270                  * the rc and rrc from the first CPU that failed destroying.
2271                  */
2272                 if (r)
2273                         break;
2274                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2275
2276                 /* no need to block service interrupts any more */
2277                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278                 break;
2279         }
2280         case KVM_PV_SET_SEC_PARMS: {
2281                 struct kvm_s390_pv_sec_parm parms = {};
2282                 void *hdr;
2283
2284                 r = -EINVAL;
2285                 if (!kvm_s390_pv_is_protected(kvm))
2286                         break;
2287
2288                 r = -EFAULT;
2289                 if (copy_from_user(&parms, argp, sizeof(parms)))
2290                         break;
2291
2292                 /* Currently restricted to 8KB */
2293                 r = -EINVAL;
2294                 if (parms.length > PAGE_SIZE * 2)
2295                         break;
2296
2297                 r = -ENOMEM;
2298                 hdr = vmalloc(parms.length);
2299                 if (!hdr)
2300                         break;
2301
2302                 r = -EFAULT;
2303                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2304                                     parms.length))
2305                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2306                                                       &cmd->rc, &cmd->rrc);
2307
2308                 vfree(hdr);
2309                 break;
2310         }
2311         case KVM_PV_UNPACK: {
2312                 struct kvm_s390_pv_unp unp = {};
2313
2314                 r = -EINVAL;
2315                 if (!kvm_s390_pv_is_protected(kvm))
2316                         break;
2317
2318                 r = -EFAULT;
2319                 if (copy_from_user(&unp, argp, sizeof(unp)))
2320                         break;
2321
2322                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2323                                        &cmd->rc, &cmd->rrc);
2324                 break;
2325         }
2326         case KVM_PV_VERIFY: {
2327                 r = -EINVAL;
2328                 if (!kvm_s390_pv_is_protected(kvm))
2329                         break;
2330
2331                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2332                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2333                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334                              cmd->rrc);
2335                 break;
2336         }
2337         case KVM_PV_PREP_RESET: {
2338                 r = -EINVAL;
2339                 if (!kvm_s390_pv_is_protected(kvm))
2340                         break;
2341
2342                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2343                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2344                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345                              cmd->rc, cmd->rrc);
2346                 break;
2347         }
2348         case KVM_PV_UNSHARE_ALL: {
2349                 r = -EINVAL;
2350                 if (!kvm_s390_pv_is_protected(kvm))
2351                         break;
2352
2353                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2354                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2355                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2356                              cmd->rc, cmd->rrc);
2357                 break;
2358         }
2359         default:
2360                 r = -ENOTTY;
2361         }
2362         return r;
2363 }
2364
2365 long kvm_arch_vm_ioctl(struct file *filp,
2366                        unsigned int ioctl, unsigned long arg)
2367 {
2368         struct kvm *kvm = filp->private_data;
2369         void __user *argp = (void __user *)arg;
2370         struct kvm_device_attr attr;
2371         int r;
2372
2373         switch (ioctl) {
2374         case KVM_S390_INTERRUPT: {
2375                 struct kvm_s390_interrupt s390int;
2376
2377                 r = -EFAULT;
2378                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2379                         break;
2380                 r = kvm_s390_inject_vm(kvm, &s390int);
2381                 break;
2382         }
2383         case KVM_CREATE_IRQCHIP: {
2384                 struct kvm_irq_routing_entry routing;
2385
2386                 r = -EINVAL;
2387                 if (kvm->arch.use_irqchip) {
2388                         /* Set up dummy routing. */
2389                         memset(&routing, 0, sizeof(routing));
2390                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2391                 }
2392                 break;
2393         }
2394         case KVM_SET_DEVICE_ATTR: {
2395                 r = -EFAULT;
2396                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2397                         break;
2398                 r = kvm_s390_vm_set_attr(kvm, &attr);
2399                 break;
2400         }
2401         case KVM_GET_DEVICE_ATTR: {
2402                 r = -EFAULT;
2403                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2404                         break;
2405                 r = kvm_s390_vm_get_attr(kvm, &attr);
2406                 break;
2407         }
2408         case KVM_HAS_DEVICE_ATTR: {
2409                 r = -EFAULT;
2410                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2411                         break;
2412                 r = kvm_s390_vm_has_attr(kvm, &attr);
2413                 break;
2414         }
2415         case KVM_S390_GET_SKEYS: {
2416                 struct kvm_s390_skeys args;
2417
2418                 r = -EFAULT;
2419                 if (copy_from_user(&args, argp,
2420                                    sizeof(struct kvm_s390_skeys)))
2421                         break;
2422                 r = kvm_s390_get_skeys(kvm, &args);
2423                 break;
2424         }
2425         case KVM_S390_SET_SKEYS: {
2426                 struct kvm_s390_skeys args;
2427
2428                 r = -EFAULT;
2429                 if (copy_from_user(&args, argp,
2430                                    sizeof(struct kvm_s390_skeys)))
2431                         break;
2432                 r = kvm_s390_set_skeys(kvm, &args);
2433                 break;
2434         }
2435         case KVM_S390_GET_CMMA_BITS: {
2436                 struct kvm_s390_cmma_log args;
2437
2438                 r = -EFAULT;
2439                 if (copy_from_user(&args, argp, sizeof(args)))
2440                         break;
2441                 mutex_lock(&kvm->slots_lock);
2442                 r = kvm_s390_get_cmma_bits(kvm, &args);
2443                 mutex_unlock(&kvm->slots_lock);
2444                 if (!r) {
2445                         r = copy_to_user(argp, &args, sizeof(args));
2446                         if (r)
2447                                 r = -EFAULT;
2448                 }
2449                 break;
2450         }
2451         case KVM_S390_SET_CMMA_BITS: {
2452                 struct kvm_s390_cmma_log args;
2453
2454                 r = -EFAULT;
2455                 if (copy_from_user(&args, argp, sizeof(args)))
2456                         break;
2457                 mutex_lock(&kvm->slots_lock);
2458                 r = kvm_s390_set_cmma_bits(kvm, &args);
2459                 mutex_unlock(&kvm->slots_lock);
2460                 break;
2461         }
2462         case KVM_S390_PV_COMMAND: {
2463                 struct kvm_pv_cmd args;
2464
2465                 /* protvirt means user sigp */
2466                 kvm->arch.user_cpu_state_ctrl = 1;
2467                 r = 0;
2468                 if (!is_prot_virt_host()) {
2469                         r = -EINVAL;
2470                         break;
2471                 }
2472                 if (copy_from_user(&args, argp, sizeof(args))) {
2473                         r = -EFAULT;
2474                         break;
2475                 }
2476                 if (args.flags) {
2477                         r = -EINVAL;
2478                         break;
2479                 }
2480                 mutex_lock(&kvm->lock);
2481                 r = kvm_s390_handle_pv(kvm, &args);
2482                 mutex_unlock(&kvm->lock);
2483                 if (copy_to_user(argp, &args, sizeof(args))) {
2484                         r = -EFAULT;
2485                         break;
2486                 }
2487                 break;
2488         }
2489         default:
2490                 r = -ENOTTY;
2491         }
2492
2493         return r;
2494 }
2495
2496 static int kvm_s390_apxa_installed(void)
2497 {
2498         struct ap_config_info info;
2499
2500         if (ap_instructions_available()) {
2501                 if (ap_qci(&info) == 0)
2502                         return info.apxa;
2503         }
2504
2505         return 0;
2506 }
2507
2508 /*
2509  * The format of the crypto control block (CRYCB) is specified in the 3 low
2510  * order bits of the CRYCB designation (CRYCBD) field as follows:
2511  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2512  *           AP extended addressing (APXA) facility are installed.
2513  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2514  * Format 2: Both the APXA and MSAX3 facilities are installed
2515  */
2516 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2517 {
2518         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2519
2520         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2521         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2522
2523         /* Check whether MSAX3 is installed */
2524         if (!test_kvm_facility(kvm, 76))
2525                 return;
2526
2527         if (kvm_s390_apxa_installed())
2528                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2529         else
2530                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2531 }
2532
2533 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2534                                unsigned long *aqm, unsigned long *adm)
2535 {
2536         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2537
2538         mutex_lock(&kvm->lock);
2539         kvm_s390_vcpu_block_all(kvm);
2540
2541         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2542         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2543                 memcpy(crycb->apcb1.apm, apm, 32);
2544                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2545                          apm[0], apm[1], apm[2], apm[3]);
2546                 memcpy(crycb->apcb1.aqm, aqm, 32);
2547                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2548                          aqm[0], aqm[1], aqm[2], aqm[3]);
2549                 memcpy(crycb->apcb1.adm, adm, 32);
2550                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2551                          adm[0], adm[1], adm[2], adm[3]);
2552                 break;
2553         case CRYCB_FORMAT1:
2554         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2555                 memcpy(crycb->apcb0.apm, apm, 8);
2556                 memcpy(crycb->apcb0.aqm, aqm, 2);
2557                 memcpy(crycb->apcb0.adm, adm, 2);
2558                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2559                          apm[0], *((unsigned short *)aqm),
2560                          *((unsigned short *)adm));
2561                 break;
2562         default:        /* Can not happen */
2563                 break;
2564         }
2565
2566         /* recreate the shadow crycb for each vcpu */
2567         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2568         kvm_s390_vcpu_unblock_all(kvm);
2569         mutex_unlock(&kvm->lock);
2570 }
2571 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2572
2573 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2574 {
2575         mutex_lock(&kvm->lock);
2576         kvm_s390_vcpu_block_all(kvm);
2577
2578         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2579                sizeof(kvm->arch.crypto.crycb->apcb0));
2580         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2581                sizeof(kvm->arch.crypto.crycb->apcb1));
2582
2583         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2584         /* recreate the shadow crycb for each vcpu */
2585         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2586         kvm_s390_vcpu_unblock_all(kvm);
2587         mutex_unlock(&kvm->lock);
2588 }
2589 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2590
2591 static u64 kvm_s390_get_initial_cpuid(void)
2592 {
2593         struct cpuid cpuid;
2594
2595         get_cpu_id(&cpuid);
2596         cpuid.version = 0xff;
2597         return *((u64 *) &cpuid);
2598 }
2599
2600 static void kvm_s390_crypto_init(struct kvm *kvm)
2601 {
2602         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2603         kvm_s390_set_crycb_format(kvm);
2604
2605         if (!test_kvm_facility(kvm, 76))
2606                 return;
2607
2608         /* Enable AES/DEA protected key functions by default */
2609         kvm->arch.crypto.aes_kw = 1;
2610         kvm->arch.crypto.dea_kw = 1;
2611         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2612                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2613         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2614                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2615 }
2616
2617 static void sca_dispose(struct kvm *kvm)
2618 {
2619         if (kvm->arch.use_esca)
2620                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2621         else
2622                 free_page((unsigned long)(kvm->arch.sca));
2623         kvm->arch.sca = NULL;
2624 }
2625
2626 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2627 {
2628         gfp_t alloc_flags = GFP_KERNEL;
2629         int i, rc;
2630         char debug_name[16];
2631         static unsigned long sca_offset;
2632
2633         rc = -EINVAL;
2634 #ifdef CONFIG_KVM_S390_UCONTROL
2635         if (type & ~KVM_VM_S390_UCONTROL)
2636                 goto out_err;
2637         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2638                 goto out_err;
2639 #else
2640         if (type)
2641                 goto out_err;
2642 #endif
2643
2644         rc = s390_enable_sie();
2645         if (rc)
2646                 goto out_err;
2647
2648         rc = -ENOMEM;
2649
2650         if (!sclp.has_64bscao)
2651                 alloc_flags |= GFP_DMA;
2652         rwlock_init(&kvm->arch.sca_lock);
2653         /* start with basic SCA */
2654         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2655         if (!kvm->arch.sca)
2656                 goto out_err;
2657         mutex_lock(&kvm_lock);
2658         sca_offset += 16;
2659         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2660                 sca_offset = 0;
2661         kvm->arch.sca = (struct bsca_block *)
2662                         ((char *) kvm->arch.sca + sca_offset);
2663         mutex_unlock(&kvm_lock);
2664
2665         sprintf(debug_name, "kvm-%u", current->pid);
2666
2667         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2668         if (!kvm->arch.dbf)
2669                 goto out_err;
2670
2671         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2672         kvm->arch.sie_page2 =
2673              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2674         if (!kvm->arch.sie_page2)
2675                 goto out_err;
2676
2677         kvm->arch.sie_page2->kvm = kvm;
2678         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2679
2680         for (i = 0; i < kvm_s390_fac_size(); i++) {
2681                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2682                                               (kvm_s390_fac_base[i] |
2683                                                kvm_s390_fac_ext[i]);
2684                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2685                                               kvm_s390_fac_base[i];
2686         }
2687         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2688
2689         /* we are always in czam mode - even on pre z14 machines */
2690         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2691         set_kvm_facility(kvm->arch.model.fac_list, 138);
2692         /* we emulate STHYI in kvm */
2693         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2694         set_kvm_facility(kvm->arch.model.fac_list, 74);
2695         if (MACHINE_HAS_TLB_GUEST) {
2696                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2697                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2698         }
2699
2700         if (css_general_characteristics.aiv && test_facility(65))
2701                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2702
2703         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2704         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2705
2706         kvm_s390_crypto_init(kvm);
2707
2708         mutex_init(&kvm->arch.float_int.ais_lock);
2709         spin_lock_init(&kvm->arch.float_int.lock);
2710         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2711                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2712         init_waitqueue_head(&kvm->arch.ipte_wq);
2713         mutex_init(&kvm->arch.ipte_mutex);
2714
2715         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2716         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2717
2718         if (type & KVM_VM_S390_UCONTROL) {
2719                 kvm->arch.gmap = NULL;
2720                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2721         } else {
2722                 if (sclp.hamax == U64_MAX)
2723                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2724                 else
2725                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2726                                                     sclp.hamax + 1);
2727                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2728                 if (!kvm->arch.gmap)
2729                         goto out_err;
2730                 kvm->arch.gmap->private = kvm;
2731                 kvm->arch.gmap->pfault_enabled = 0;
2732         }
2733
2734         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2735         kvm->arch.use_skf = sclp.has_skey;
2736         spin_lock_init(&kvm->arch.start_stop_lock);
2737         kvm_s390_vsie_init(kvm);
2738         if (use_gisa)
2739                 kvm_s390_gisa_init(kvm);
2740         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2741
2742         return 0;
2743 out_err:
2744         free_page((unsigned long)kvm->arch.sie_page2);
2745         debug_unregister(kvm->arch.dbf);
2746         sca_dispose(kvm);
2747         KVM_EVENT(3, "creation of vm failed: %d", rc);
2748         return rc;
2749 }
2750
2751 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2752 {
2753         u16 rc, rrc;
2754
2755         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2756         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2757         kvm_s390_clear_local_irqs(vcpu);
2758         kvm_clear_async_pf_completion_queue(vcpu);
2759         if (!kvm_is_ucontrol(vcpu->kvm))
2760                 sca_del_vcpu(vcpu);
2761
2762         if (kvm_is_ucontrol(vcpu->kvm))
2763                 gmap_remove(vcpu->arch.gmap);
2764
2765         if (vcpu->kvm->arch.use_cmma)
2766                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2767         /* We can not hold the vcpu mutex here, we are already dying */
2768         if (kvm_s390_pv_cpu_get_handle(vcpu))
2769                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2770         free_page((unsigned long)(vcpu->arch.sie_block));
2771 }
2772
2773 static void kvm_free_vcpus(struct kvm *kvm)
2774 {
2775         unsigned int i;
2776         struct kvm_vcpu *vcpu;
2777
2778         kvm_for_each_vcpu(i, vcpu, kvm)
2779                 kvm_vcpu_destroy(vcpu);
2780
2781         mutex_lock(&kvm->lock);
2782         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2783                 kvm->vcpus[i] = NULL;
2784
2785         atomic_set(&kvm->online_vcpus, 0);
2786         mutex_unlock(&kvm->lock);
2787 }
2788
2789 void kvm_arch_destroy_vm(struct kvm *kvm)
2790 {
2791         u16 rc, rrc;
2792
2793         kvm_free_vcpus(kvm);
2794         sca_dispose(kvm);
2795         kvm_s390_gisa_destroy(kvm);
2796         /*
2797          * We are already at the end of life and kvm->lock is not taken.
2798          * This is ok as the file descriptor is closed by now and nobody
2799          * can mess with the pv state. To avoid lockdep_assert_held from
2800          * complaining we do not use kvm_s390_pv_is_protected.
2801          */
2802         if (kvm_s390_pv_get_handle(kvm))
2803                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2804         debug_unregister(kvm->arch.dbf);
2805         free_page((unsigned long)kvm->arch.sie_page2);
2806         if (!kvm_is_ucontrol(kvm))
2807                 gmap_remove(kvm->arch.gmap);
2808         kvm_s390_destroy_adapters(kvm);
2809         kvm_s390_clear_float_irqs(kvm);
2810         kvm_s390_vsie_destroy(kvm);
2811         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2812 }
2813
2814 /* Section: vcpu related */
2815 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2816 {
2817         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2818         if (!vcpu->arch.gmap)
2819                 return -ENOMEM;
2820         vcpu->arch.gmap->private = vcpu->kvm;
2821
2822         return 0;
2823 }
2824
2825 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2826 {
2827         if (!kvm_s390_use_sca_entries())
2828                 return;
2829         read_lock(&vcpu->kvm->arch.sca_lock);
2830         if (vcpu->kvm->arch.use_esca) {
2831                 struct esca_block *sca = vcpu->kvm->arch.sca;
2832
2833                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2834                 sca->cpu[vcpu->vcpu_id].sda = 0;
2835         } else {
2836                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2837
2838                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2839                 sca->cpu[vcpu->vcpu_id].sda = 0;
2840         }
2841         read_unlock(&vcpu->kvm->arch.sca_lock);
2842 }
2843
2844 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2845 {
2846         if (!kvm_s390_use_sca_entries()) {
2847                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2848
2849                 /* we still need the basic sca for the ipte control */
2850                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2851                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2852                 return;
2853         }
2854         read_lock(&vcpu->kvm->arch.sca_lock);
2855         if (vcpu->kvm->arch.use_esca) {
2856                 struct esca_block *sca = vcpu->kvm->arch.sca;
2857
2858                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2859                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2860                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2861                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2862                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2863         } else {
2864                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2865
2866                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2867                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2868                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2869                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2870         }
2871         read_unlock(&vcpu->kvm->arch.sca_lock);
2872 }
2873
2874 /* Basic SCA to Extended SCA data copy routines */
2875 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2876 {
2877         d->sda = s->sda;
2878         d->sigp_ctrl.c = s->sigp_ctrl.c;
2879         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2880 }
2881
2882 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2883 {
2884         int i;
2885
2886         d->ipte_control = s->ipte_control;
2887         d->mcn[0] = s->mcn;
2888         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2889                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2890 }
2891
2892 static int sca_switch_to_extended(struct kvm *kvm)
2893 {
2894         struct bsca_block *old_sca = kvm->arch.sca;
2895         struct esca_block *new_sca;
2896         struct kvm_vcpu *vcpu;
2897         unsigned int vcpu_idx;
2898         u32 scaol, scaoh;
2899
2900         if (kvm->arch.use_esca)
2901                 return 0;
2902
2903         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2904         if (!new_sca)
2905                 return -ENOMEM;
2906
2907         scaoh = (u32)((u64)(new_sca) >> 32);
2908         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2909
2910         kvm_s390_vcpu_block_all(kvm);
2911         write_lock(&kvm->arch.sca_lock);
2912
2913         sca_copy_b_to_e(new_sca, old_sca);
2914
2915         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2916                 vcpu->arch.sie_block->scaoh = scaoh;
2917                 vcpu->arch.sie_block->scaol = scaol;
2918                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2919         }
2920         kvm->arch.sca = new_sca;
2921         kvm->arch.use_esca = 1;
2922
2923         write_unlock(&kvm->arch.sca_lock);
2924         kvm_s390_vcpu_unblock_all(kvm);
2925
2926         free_page((unsigned long)old_sca);
2927
2928         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2929                  old_sca, kvm->arch.sca);
2930         return 0;
2931 }
2932
2933 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2934 {
2935         int rc;
2936
2937         if (!kvm_s390_use_sca_entries()) {
2938                 if (id < KVM_MAX_VCPUS)
2939                         return true;
2940                 return false;
2941         }
2942         if (id < KVM_S390_BSCA_CPU_SLOTS)
2943                 return true;
2944         if (!sclp.has_esca || !sclp.has_64bscao)
2945                 return false;
2946
2947         mutex_lock(&kvm->lock);
2948         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2949         mutex_unlock(&kvm->lock);
2950
2951         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2952 }
2953
2954 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2955 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2956 {
2957         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2958         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2959         vcpu->arch.cputm_start = get_tod_clock_fast();
2960         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2961 }
2962
2963 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2964 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2965 {
2966         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2967         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2968         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2969         vcpu->arch.cputm_start = 0;
2970         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2971 }
2972
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2974 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2977         vcpu->arch.cputm_enabled = true;
2978         __start_cpu_timer_accounting(vcpu);
2979 }
2980
2981 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2982 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2983 {
2984         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2985         __stop_cpu_timer_accounting(vcpu);
2986         vcpu->arch.cputm_enabled = false;
2987 }
2988
2989 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2990 {
2991         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2992         __enable_cpu_timer_accounting(vcpu);
2993         preempt_enable();
2994 }
2995
2996 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2997 {
2998         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2999         __disable_cpu_timer_accounting(vcpu);
3000         preempt_enable();
3001 }
3002
3003 /* set the cpu timer - may only be called from the VCPU thread itself */
3004 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3005 {
3006         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3007         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3008         if (vcpu->arch.cputm_enabled)
3009                 vcpu->arch.cputm_start = get_tod_clock_fast();
3010         vcpu->arch.sie_block->cputm = cputm;
3011         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3012         preempt_enable();
3013 }
3014
3015 /* update and get the cpu timer - can also be called from other VCPU threads */
3016 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3017 {
3018         unsigned int seq;
3019         __u64 value;
3020
3021         if (unlikely(!vcpu->arch.cputm_enabled))
3022                 return vcpu->arch.sie_block->cputm;
3023
3024         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3025         do {
3026                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3027                 /*
3028                  * If the writer would ever execute a read in the critical
3029                  * section, e.g. in irq context, we have a deadlock.
3030                  */
3031                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3032                 value = vcpu->arch.sie_block->cputm;
3033                 /* if cputm_start is 0, accounting is being started/stopped */
3034                 if (likely(vcpu->arch.cputm_start))
3035                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3036         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3037         preempt_enable();
3038         return value;
3039 }
3040
3041 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3042 {
3043
3044         gmap_enable(vcpu->arch.enabled_gmap);
3045         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3046         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3047                 __start_cpu_timer_accounting(vcpu);
3048         vcpu->cpu = cpu;
3049 }
3050
3051 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3052 {
3053         vcpu->cpu = -1;
3054         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3055                 __stop_cpu_timer_accounting(vcpu);
3056         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3057         vcpu->arch.enabled_gmap = gmap_get_enabled();
3058         gmap_disable(vcpu->arch.enabled_gmap);
3059
3060 }
3061
3062 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3063 {
3064         mutex_lock(&vcpu->kvm->lock);
3065         preempt_disable();
3066         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3067         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3068         preempt_enable();
3069         mutex_unlock(&vcpu->kvm->lock);
3070         if (!kvm_is_ucontrol(vcpu->kvm)) {
3071                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3072                 sca_add_vcpu(vcpu);
3073         }
3074         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3075                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3076         /* make vcpu_load load the right gmap on the first trigger */
3077         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3078 }
3079
3080 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3081 {
3082         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3083             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3084                 return true;
3085         return false;
3086 }
3087
3088 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3089 {
3090         /* At least one ECC subfunction must be present */
3091         return kvm_has_pckmo_subfunc(kvm, 32) ||
3092                kvm_has_pckmo_subfunc(kvm, 33) ||
3093                kvm_has_pckmo_subfunc(kvm, 34) ||
3094                kvm_has_pckmo_subfunc(kvm, 40) ||
3095                kvm_has_pckmo_subfunc(kvm, 41);
3096
3097 }
3098
3099 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3100 {
3101         /*
3102          * If the AP instructions are not being interpreted and the MSAX3
3103          * facility is not configured for the guest, there is nothing to set up.
3104          */
3105         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3106                 return;
3107
3108         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3109         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3110         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3111         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3112
3113         if (vcpu->kvm->arch.crypto.apie)
3114                 vcpu->arch.sie_block->eca |= ECA_APIE;
3115
3116         /* Set up protected key support */
3117         if (vcpu->kvm->arch.crypto.aes_kw) {
3118                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3119                 /* ecc is also wrapped with AES key */
3120                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3121                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3122         }
3123
3124         if (vcpu->kvm->arch.crypto.dea_kw)
3125                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3126 }
3127
3128 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3129 {
3130         free_page(vcpu->arch.sie_block->cbrlo);
3131         vcpu->arch.sie_block->cbrlo = 0;
3132 }
3133
3134 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3135 {
3136         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3137         if (!vcpu->arch.sie_block->cbrlo)
3138                 return -ENOMEM;
3139         return 0;
3140 }
3141
3142 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3143 {
3144         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3145
3146         vcpu->arch.sie_block->ibc = model->ibc;
3147         if (test_kvm_facility(vcpu->kvm, 7))
3148                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3149 }
3150
3151 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3152 {
3153         int rc = 0;
3154         u16 uvrc, uvrrc;
3155
3156         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3157                                                     CPUSTAT_SM |
3158                                                     CPUSTAT_STOPPED);
3159
3160         if (test_kvm_facility(vcpu->kvm, 78))
3161                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3162         else if (test_kvm_facility(vcpu->kvm, 8))
3163                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3164
3165         kvm_s390_vcpu_setup_model(vcpu);
3166
3167         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3168         if (MACHINE_HAS_ESOP)
3169                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3170         if (test_kvm_facility(vcpu->kvm, 9))
3171                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3172         if (test_kvm_facility(vcpu->kvm, 73))
3173                 vcpu->arch.sie_block->ecb |= ECB_TE;
3174
3175         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3176                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3177         if (test_kvm_facility(vcpu->kvm, 130))
3178                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3179         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3180         if (sclp.has_cei)
3181                 vcpu->arch.sie_block->eca |= ECA_CEI;
3182         if (sclp.has_ib)
3183                 vcpu->arch.sie_block->eca |= ECA_IB;
3184         if (sclp.has_siif)
3185                 vcpu->arch.sie_block->eca |= ECA_SII;
3186         if (sclp.has_sigpif)
3187                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3188         if (test_kvm_facility(vcpu->kvm, 129)) {
3189                 vcpu->arch.sie_block->eca |= ECA_VX;
3190                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3191         }
3192         if (test_kvm_facility(vcpu->kvm, 139))
3193                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3194         if (test_kvm_facility(vcpu->kvm, 156))
3195                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3196         if (vcpu->arch.sie_block->gd) {
3197                 vcpu->arch.sie_block->eca |= ECA_AIV;
3198                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3199                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3200         }
3201         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3202                                         | SDNXC;
3203         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3204
3205         if (sclp.has_kss)
3206                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3207         else
3208                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3209
3210         if (vcpu->kvm->arch.use_cmma) {
3211                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3212                 if (rc)
3213                         return rc;
3214         }
3215         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3216         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3217
3218         vcpu->arch.sie_block->hpid = HPID_KVM;
3219
3220         kvm_s390_vcpu_crypto_setup(vcpu);
3221
3222         mutex_lock(&vcpu->kvm->lock);
3223         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3224                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3225                 if (rc)
3226                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3227         }
3228         mutex_unlock(&vcpu->kvm->lock);
3229
3230         return rc;
3231 }
3232
3233 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3234 {
3235         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3236                 return -EINVAL;
3237         return 0;
3238 }
3239
3240 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3241 {
3242         struct sie_page *sie_page;
3243         int rc;
3244
3245         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3246         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3247         if (!sie_page)
3248                 return -ENOMEM;
3249
3250         vcpu->arch.sie_block = &sie_page->sie_block;
3251         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3252
3253         /* the real guest size will always be smaller than msl */
3254         vcpu->arch.sie_block->mso = 0;
3255         vcpu->arch.sie_block->msl = sclp.hamax;
3256
3257         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3258         spin_lock_init(&vcpu->arch.local_int.lock);
3259         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3260         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3261                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3262         seqcount_init(&vcpu->arch.cputm_seqcount);
3263
3264         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3265         kvm_clear_async_pf_completion_queue(vcpu);
3266         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3267                                     KVM_SYNC_GPRS |
3268                                     KVM_SYNC_ACRS |
3269                                     KVM_SYNC_CRS |
3270                                     KVM_SYNC_ARCH0 |
3271                                     KVM_SYNC_PFAULT;
3272         kvm_s390_set_prefix(vcpu, 0);
3273         if (test_kvm_facility(vcpu->kvm, 64))
3274                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3275         if (test_kvm_facility(vcpu->kvm, 82))
3276                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3277         if (test_kvm_facility(vcpu->kvm, 133))
3278                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3279         if (test_kvm_facility(vcpu->kvm, 156))
3280                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3281         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3282          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3283          */
3284         if (MACHINE_HAS_VX)
3285                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3286         else
3287                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3288
3289         if (kvm_is_ucontrol(vcpu->kvm)) {
3290                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3291                 if (rc)
3292                         goto out_free_sie_block;
3293         }
3294
3295         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3296                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3297         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3298
3299         rc = kvm_s390_vcpu_setup(vcpu);
3300         if (rc)
3301                 goto out_ucontrol_uninit;
3302         return 0;
3303
3304 out_ucontrol_uninit:
3305         if (kvm_is_ucontrol(vcpu->kvm))
3306                 gmap_remove(vcpu->arch.gmap);
3307 out_free_sie_block:
3308         free_page((unsigned long)(vcpu->arch.sie_block));
3309         return rc;
3310 }
3311
3312 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3313 {
3314         return kvm_s390_vcpu_has_irq(vcpu, 0);
3315 }
3316
3317 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3318 {
3319         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3320 }
3321
3322 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3323 {
3324         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3325         exit_sie(vcpu);
3326 }
3327
3328 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3329 {
3330         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3331 }
3332
3333 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3334 {
3335         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3336         exit_sie(vcpu);
3337 }
3338
3339 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3340 {
3341         return atomic_read(&vcpu->arch.sie_block->prog20) &
3342                (PROG_BLOCK_SIE | PROG_REQUEST);
3343 }
3344
3345 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3346 {
3347         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3348 }
3349
3350 /*
3351  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3352  * If the CPU is not running (e.g. waiting as idle) the function will
3353  * return immediately. */
3354 void exit_sie(struct kvm_vcpu *vcpu)
3355 {
3356         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3357         kvm_s390_vsie_kick(vcpu);
3358         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3359                 cpu_relax();
3360 }
3361
3362 /* Kick a guest cpu out of SIE to process a request synchronously */
3363 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3364 {
3365         kvm_make_request(req, vcpu);
3366         kvm_s390_vcpu_request(vcpu);
3367 }
3368
3369 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3370                               unsigned long end)
3371 {
3372         struct kvm *kvm = gmap->private;
3373         struct kvm_vcpu *vcpu;
3374         unsigned long prefix;
3375         int i;
3376
3377         if (gmap_is_shadow(gmap))
3378                 return;
3379         if (start >= 1UL << 31)
3380                 /* We are only interested in prefix pages */
3381                 return;
3382         kvm_for_each_vcpu(i, vcpu, kvm) {
3383                 /* match against both prefix pages */
3384                 prefix = kvm_s390_get_prefix(vcpu);
3385                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3386                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3387                                    start, end);
3388                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3389                 }
3390         }
3391 }
3392
3393 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3394 {
3395         /* do not poll with more than halt_poll_max_steal percent of steal time */
3396         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3397             halt_poll_max_steal) {
3398                 vcpu->stat.halt_no_poll_steal++;
3399                 return true;
3400         }
3401         return false;
3402 }
3403
3404 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3405 {
3406         /* kvm common code refers to this, but never calls it */
3407         BUG();
3408         return 0;
3409 }
3410
3411 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3412                                            struct kvm_one_reg *reg)
3413 {
3414         int r = -EINVAL;
3415
3416         switch (reg->id) {
3417         case KVM_REG_S390_TODPR:
3418                 r = put_user(vcpu->arch.sie_block->todpr,
3419                              (u32 __user *)reg->addr);
3420                 break;
3421         case KVM_REG_S390_EPOCHDIFF:
3422                 r = put_user(vcpu->arch.sie_block->epoch,
3423                              (u64 __user *)reg->addr);
3424                 break;
3425         case KVM_REG_S390_CPU_TIMER:
3426                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3427                              (u64 __user *)reg->addr);
3428                 break;
3429         case KVM_REG_S390_CLOCK_COMP:
3430                 r = put_user(vcpu->arch.sie_block->ckc,
3431                              (u64 __user *)reg->addr);
3432                 break;
3433         case KVM_REG_S390_PFTOKEN:
3434                 r = put_user(vcpu->arch.pfault_token,
3435                              (u64 __user *)reg->addr);
3436                 break;
3437         case KVM_REG_S390_PFCOMPARE:
3438                 r = put_user(vcpu->arch.pfault_compare,
3439                              (u64 __user *)reg->addr);
3440                 break;
3441         case KVM_REG_S390_PFSELECT:
3442                 r = put_user(vcpu->arch.pfault_select,
3443                              (u64 __user *)reg->addr);
3444                 break;
3445         case KVM_REG_S390_PP:
3446                 r = put_user(vcpu->arch.sie_block->pp,
3447                              (u64 __user *)reg->addr);
3448                 break;
3449         case KVM_REG_S390_GBEA:
3450                 r = put_user(vcpu->arch.sie_block->gbea,
3451                              (u64 __user *)reg->addr);
3452                 break;
3453         default:
3454                 break;
3455         }
3456
3457         return r;
3458 }
3459
3460 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3461                                            struct kvm_one_reg *reg)
3462 {
3463         int r = -EINVAL;
3464         __u64 val;
3465
3466         switch (reg->id) {
3467         case KVM_REG_S390_TODPR:
3468                 r = get_user(vcpu->arch.sie_block->todpr,
3469                              (u32 __user *)reg->addr);
3470                 break;
3471         case KVM_REG_S390_EPOCHDIFF:
3472                 r = get_user(vcpu->arch.sie_block->epoch,
3473                              (u64 __user *)reg->addr);
3474                 break;
3475         case KVM_REG_S390_CPU_TIMER:
3476                 r = get_user(val, (u64 __user *)reg->addr);
3477                 if (!r)
3478                         kvm_s390_set_cpu_timer(vcpu, val);
3479                 break;
3480         case KVM_REG_S390_CLOCK_COMP:
3481                 r = get_user(vcpu->arch.sie_block->ckc,
3482                              (u64 __user *)reg->addr);
3483                 break;
3484         case KVM_REG_S390_PFTOKEN:
3485                 r = get_user(vcpu->arch.pfault_token,
3486                              (u64 __user *)reg->addr);
3487                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3488                         kvm_clear_async_pf_completion_queue(vcpu);
3489                 break;
3490         case KVM_REG_S390_PFCOMPARE:
3491                 r = get_user(vcpu->arch.pfault_compare,
3492                              (u64 __user *)reg->addr);
3493                 break;
3494         case KVM_REG_S390_PFSELECT:
3495                 r = get_user(vcpu->arch.pfault_select,
3496                              (u64 __user *)reg->addr);
3497                 break;
3498         case KVM_REG_S390_PP:
3499                 r = get_user(vcpu->arch.sie_block->pp,
3500                              (u64 __user *)reg->addr);
3501                 break;
3502         case KVM_REG_S390_GBEA:
3503                 r = get_user(vcpu->arch.sie_block->gbea,
3504                              (u64 __user *)reg->addr);
3505                 break;
3506         default:
3507                 break;
3508         }
3509
3510         return r;
3511 }
3512
3513 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3514 {
3515         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3516         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3517         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3518
3519         kvm_clear_async_pf_completion_queue(vcpu);
3520         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3521                 kvm_s390_vcpu_stop(vcpu);
3522         kvm_s390_clear_local_irqs(vcpu);
3523 }
3524
3525 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3526 {
3527         /* Initial reset is a superset of the normal reset */
3528         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3529
3530         /*
3531          * This equals initial cpu reset in pop, but we don't switch to ESA.
3532          * We do not only reset the internal data, but also ...
3533          */
3534         vcpu->arch.sie_block->gpsw.mask = 0;
3535         vcpu->arch.sie_block->gpsw.addr = 0;
3536         kvm_s390_set_prefix(vcpu, 0);
3537         kvm_s390_set_cpu_timer(vcpu, 0);
3538         vcpu->arch.sie_block->ckc = 0;
3539         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3540         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3541         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3542
3543         /* ... the data in sync regs */
3544         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3545         vcpu->run->s.regs.ckc = 0;
3546         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3547         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3548         vcpu->run->psw_addr = 0;
3549         vcpu->run->psw_mask = 0;
3550         vcpu->run->s.regs.todpr = 0;
3551         vcpu->run->s.regs.cputm = 0;
3552         vcpu->run->s.regs.ckc = 0;
3553         vcpu->run->s.regs.pp = 0;
3554         vcpu->run->s.regs.gbea = 1;
3555         vcpu->run->s.regs.fpc = 0;
3556         /*
3557          * Do not reset these registers in the protected case, as some of
3558          * them are overlayed and they are not accessible in this case
3559          * anyway.
3560          */
3561         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3562                 vcpu->arch.sie_block->gbea = 1;
3563                 vcpu->arch.sie_block->pp = 0;
3564                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3565                 vcpu->arch.sie_block->todpr = 0;
3566         }
3567 }
3568
3569 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3570 {
3571         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3572
3573         /* Clear reset is a superset of the initial reset */
3574         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3575
3576         memset(&regs->gprs, 0, sizeof(regs->gprs));
3577         memset(&regs->vrs, 0, sizeof(regs->vrs));
3578         memset(&regs->acrs, 0, sizeof(regs->acrs));
3579         memset(&regs->gscb, 0, sizeof(regs->gscb));
3580
3581         regs->etoken = 0;
3582         regs->etoken_extension = 0;
3583 }
3584
3585 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3586 {
3587         vcpu_load(vcpu);
3588         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3589         vcpu_put(vcpu);
3590         return 0;
3591 }
3592
3593 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3594 {
3595         vcpu_load(vcpu);
3596         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3597         vcpu_put(vcpu);
3598         return 0;
3599 }
3600
3601 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3602                                   struct kvm_sregs *sregs)
3603 {
3604         vcpu_load(vcpu);
3605
3606         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3607         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3608
3609         vcpu_put(vcpu);
3610         return 0;
3611 }
3612
3613 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3614                                   struct kvm_sregs *sregs)
3615 {
3616         vcpu_load(vcpu);
3617
3618         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3619         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3620
3621         vcpu_put(vcpu);
3622         return 0;
3623 }
3624
3625 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3626 {
3627         int ret = 0;
3628
3629         vcpu_load(vcpu);
3630
3631         if (test_fp_ctl(fpu->fpc)) {
3632                 ret = -EINVAL;
3633                 goto out;
3634         }
3635         vcpu->run->s.regs.fpc = fpu->fpc;
3636         if (MACHINE_HAS_VX)
3637                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3638                                  (freg_t *) fpu->fprs);
3639         else
3640                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3641
3642 out:
3643         vcpu_put(vcpu);
3644         return ret;
3645 }
3646
3647 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3648 {
3649         vcpu_load(vcpu);
3650
3651         /* make sure we have the latest values */
3652         save_fpu_regs();
3653         if (MACHINE_HAS_VX)
3654                 convert_vx_to_fp((freg_t *) fpu->fprs,
3655                                  (__vector128 *) vcpu->run->s.regs.vrs);
3656         else
3657                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3658         fpu->fpc = vcpu->run->s.regs.fpc;
3659
3660         vcpu_put(vcpu);
3661         return 0;
3662 }
3663
3664 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3665 {
3666         int rc = 0;
3667
3668         if (!is_vcpu_stopped(vcpu))
3669                 rc = -EBUSY;
3670         else {
3671                 vcpu->run->psw_mask = psw.mask;
3672                 vcpu->run->psw_addr = psw.addr;
3673         }
3674         return rc;
3675 }
3676
3677 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3678                                   struct kvm_translation *tr)
3679 {
3680         return -EINVAL; /* not implemented yet */
3681 }
3682
3683 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3684                               KVM_GUESTDBG_USE_HW_BP | \
3685                               KVM_GUESTDBG_ENABLE)
3686
3687 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3688                                         struct kvm_guest_debug *dbg)
3689 {
3690         int rc = 0;
3691
3692         vcpu_load(vcpu);
3693
3694         vcpu->guest_debug = 0;
3695         kvm_s390_clear_bp_data(vcpu);
3696
3697         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3698                 rc = -EINVAL;
3699                 goto out;
3700         }
3701         if (!sclp.has_gpere) {
3702                 rc = -EINVAL;
3703                 goto out;
3704         }
3705
3706         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3707                 vcpu->guest_debug = dbg->control;
3708                 /* enforce guest PER */
3709                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3710
3711                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3712                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3713         } else {
3714                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3715                 vcpu->arch.guestdbg.last_bp = 0;
3716         }
3717
3718         if (rc) {
3719                 vcpu->guest_debug = 0;
3720                 kvm_s390_clear_bp_data(vcpu);
3721                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3722         }
3723
3724 out:
3725         vcpu_put(vcpu);
3726         return rc;
3727 }
3728
3729 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3730                                     struct kvm_mp_state *mp_state)
3731 {
3732         int ret;
3733
3734         vcpu_load(vcpu);
3735
3736         /* CHECK_STOP and LOAD are not supported yet */
3737         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3738                                       KVM_MP_STATE_OPERATING;
3739
3740         vcpu_put(vcpu);
3741         return ret;
3742 }
3743
3744 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3745                                     struct kvm_mp_state *mp_state)
3746 {
3747         int rc = 0;
3748
3749         vcpu_load(vcpu);
3750
3751         /* user space knows about this interface - let it control the state */
3752         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3753
3754         switch (mp_state->mp_state) {
3755         case KVM_MP_STATE_STOPPED:
3756                 rc = kvm_s390_vcpu_stop(vcpu);
3757                 break;
3758         case KVM_MP_STATE_OPERATING:
3759                 rc = kvm_s390_vcpu_start(vcpu);
3760                 break;
3761         case KVM_MP_STATE_LOAD:
3762                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3763                         rc = -ENXIO;
3764                         break;
3765                 }
3766                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3767                 break;
3768         case KVM_MP_STATE_CHECK_STOP:
3769                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3770         default:
3771                 rc = -ENXIO;
3772         }
3773
3774         vcpu_put(vcpu);
3775         return rc;
3776 }
3777
3778 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3779 {
3780         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3781 }
3782
3783 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3784 {
3785 retry:
3786         kvm_s390_vcpu_request_handled(vcpu);
3787         if (!kvm_request_pending(vcpu))
3788                 return 0;
3789         /*
3790          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3791          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3792          * This ensures that the ipte instruction for this request has
3793          * already finished. We might race against a second unmapper that
3794          * wants to set the blocking bit. Lets just retry the request loop.
3795          */
3796         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3797                 int rc;
3798                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3799                                           kvm_s390_get_prefix(vcpu),
3800                                           PAGE_SIZE * 2, PROT_WRITE);
3801                 if (rc) {
3802                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3803                         return rc;
3804                 }
3805                 goto retry;
3806         }
3807
3808         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3809                 vcpu->arch.sie_block->ihcpu = 0xffff;
3810                 goto retry;
3811         }
3812
3813         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3814                 if (!ibs_enabled(vcpu)) {
3815                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3816                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3817                 }
3818                 goto retry;
3819         }
3820
3821         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3822                 if (ibs_enabled(vcpu)) {
3823                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3824                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3825                 }
3826                 goto retry;
3827         }
3828
3829         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3830                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3831                 goto retry;
3832         }
3833
3834         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3835                 /*
3836                  * Disable CMM virtualization; we will emulate the ESSA
3837                  * instruction manually, in order to provide additional
3838                  * functionalities needed for live migration.
3839                  */
3840                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3841                 goto retry;
3842         }
3843
3844         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3845                 /*
3846                  * Re-enable CMM virtualization if CMMA is available and
3847                  * CMM has been used.
3848                  */
3849                 if ((vcpu->kvm->arch.use_cmma) &&
3850                     (vcpu->kvm->mm->context.uses_cmm))
3851                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3852                 goto retry;
3853         }
3854
3855         /* nothing to do, just clear the request */
3856         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3857         /* we left the vsie handler, nothing to do, just clear the request */
3858         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3859
3860         return 0;
3861 }
3862
3863 void kvm_s390_set_tod_clock(struct kvm *kvm,
3864                             const struct kvm_s390_vm_tod_clock *gtod)
3865 {
3866         struct kvm_vcpu *vcpu;
3867         struct kvm_s390_tod_clock_ext htod;
3868         int i;
3869
3870         mutex_lock(&kvm->lock);
3871         preempt_disable();
3872
3873         get_tod_clock_ext((char *)&htod);
3874
3875         kvm->arch.epoch = gtod->tod - htod.tod;
3876         kvm->arch.epdx = 0;
3877         if (test_kvm_facility(kvm, 139)) {
3878                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3879                 if (kvm->arch.epoch > gtod->tod)
3880                         kvm->arch.epdx -= 1;
3881         }
3882
3883         kvm_s390_vcpu_block_all(kvm);
3884         kvm_for_each_vcpu(i, vcpu, kvm) {
3885                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3886                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3887         }
3888
3889         kvm_s390_vcpu_unblock_all(kvm);
3890         preempt_enable();
3891         mutex_unlock(&kvm->lock);
3892 }
3893
3894 /**
3895  * kvm_arch_fault_in_page - fault-in guest page if necessary
3896  * @vcpu: The corresponding virtual cpu
3897  * @gpa: Guest physical address
3898  * @writable: Whether the page should be writable or not
3899  *
3900  * Make sure that a guest page has been faulted-in on the host.
3901  *
3902  * Return: Zero on success, negative error code otherwise.
3903  */
3904 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3905 {
3906         return gmap_fault(vcpu->arch.gmap, gpa,
3907                           writable ? FAULT_FLAG_WRITE : 0);
3908 }
3909
3910 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3911                                       unsigned long token)
3912 {
3913         struct kvm_s390_interrupt inti;
3914         struct kvm_s390_irq irq;
3915
3916         if (start_token) {
3917                 irq.u.ext.ext_params2 = token;
3918                 irq.type = KVM_S390_INT_PFAULT_INIT;
3919                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3920         } else {
3921                 inti.type = KVM_S390_INT_PFAULT_DONE;
3922                 inti.parm64 = token;
3923                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3924         }
3925 }
3926
3927 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3928                                      struct kvm_async_pf *work)
3929 {
3930         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3931         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3932 }
3933
3934 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3935                                  struct kvm_async_pf *work)
3936 {
3937         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3938         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3939 }
3940
3941 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3942                                struct kvm_async_pf *work)
3943 {
3944         /* s390 will always inject the page directly */
3945 }
3946
3947 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3948 {
3949         /*
3950          * s390 will always inject the page directly,
3951          * but we still want check_async_completion to cleanup
3952          */
3953         return true;
3954 }
3955
3956 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3957 {
3958         hva_t hva;
3959         struct kvm_arch_async_pf arch;
3960         int rc;
3961
3962         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3963                 return 0;
3964         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3965             vcpu->arch.pfault_compare)
3966                 return 0;
3967         if (psw_extint_disabled(vcpu))
3968                 return 0;
3969         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3970                 return 0;
3971         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3972                 return 0;
3973         if (!vcpu->arch.gmap->pfault_enabled)
3974                 return 0;
3975
3976         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3977         hva += current->thread.gmap_addr & ~PAGE_MASK;
3978         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3979                 return 0;
3980
3981         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3982         return rc;
3983 }
3984
3985 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3986 {
3987         int rc, cpuflags;
3988
3989         /*
3990          * On s390 notifications for arriving pages will be delivered directly
3991          * to the guest but the house keeping for completed pfaults is
3992          * handled outside the worker.
3993          */
3994         kvm_check_async_pf_completion(vcpu);
3995
3996         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3997         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3998
3999         if (need_resched())
4000                 schedule();
4001
4002         if (test_cpu_flag(CIF_MCCK_PENDING))
4003                 s390_handle_mcck();
4004
4005         if (!kvm_is_ucontrol(vcpu->kvm)) {
4006                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4007                 if (rc)
4008                         return rc;
4009         }
4010
4011         rc = kvm_s390_handle_requests(vcpu);
4012         if (rc)
4013                 return rc;
4014
4015         if (guestdbg_enabled(vcpu)) {
4016                 kvm_s390_backup_guest_per_regs(vcpu);
4017                 kvm_s390_patch_guest_per_regs(vcpu);
4018         }
4019
4020         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4021
4022         vcpu->arch.sie_block->icptcode = 0;
4023         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4024         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4025         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4026
4027         return 0;
4028 }
4029
4030 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4031 {
4032         struct kvm_s390_pgm_info pgm_info = {
4033                 .code = PGM_ADDRESSING,
4034         };
4035         u8 opcode, ilen;
4036         int rc;
4037
4038         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4039         trace_kvm_s390_sie_fault(vcpu);
4040
4041         /*
4042          * We want to inject an addressing exception, which is defined as a
4043          * suppressing or terminating exception. However, since we came here
4044          * by a DAT access exception, the PSW still points to the faulting
4045          * instruction since DAT exceptions are nullifying. So we've got
4046          * to look up the current opcode to get the length of the instruction
4047          * to be able to forward the PSW.
4048          */
4049         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4050         ilen = insn_length(opcode);
4051         if (rc < 0) {
4052                 return rc;
4053         } else if (rc) {
4054                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4055                  * Forward by arbitrary ilc, injection will take care of
4056                  * nullification if necessary.
4057                  */
4058                 pgm_info = vcpu->arch.pgm;
4059                 ilen = 4;
4060         }
4061         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4062         kvm_s390_forward_psw(vcpu, ilen);
4063         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4064 }
4065
4066 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4067 {
4068         struct mcck_volatile_info *mcck_info;
4069         struct sie_page *sie_page;
4070
4071         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4072                    vcpu->arch.sie_block->icptcode);
4073         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4074
4075         if (guestdbg_enabled(vcpu))
4076                 kvm_s390_restore_guest_per_regs(vcpu);
4077
4078         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4079         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4080
4081         if (exit_reason == -EINTR) {
4082                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4083                 sie_page = container_of(vcpu->arch.sie_block,
4084                                         struct sie_page, sie_block);
4085                 mcck_info = &sie_page->mcck_info;
4086                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4087                 return 0;
4088         }
4089
4090         if (vcpu->arch.sie_block->icptcode > 0) {
4091                 int rc = kvm_handle_sie_intercept(vcpu);
4092
4093                 if (rc != -EOPNOTSUPP)
4094                         return rc;
4095                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4096                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4097                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4098                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4099                 return -EREMOTE;
4100         } else if (exit_reason != -EFAULT) {
4101                 vcpu->stat.exit_null++;
4102                 return 0;
4103         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4104                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4105                 vcpu->run->s390_ucontrol.trans_exc_code =
4106                                                 current->thread.gmap_addr;
4107                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4108                 return -EREMOTE;
4109         } else if (current->thread.gmap_pfault) {
4110                 trace_kvm_s390_major_guest_pfault(vcpu);
4111                 current->thread.gmap_pfault = 0;
4112                 if (kvm_arch_setup_async_pf(vcpu))
4113                         return 0;
4114                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4115         }
4116         return vcpu_post_run_fault_in_sie(vcpu);
4117 }
4118
4119 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4120 static int __vcpu_run(struct kvm_vcpu *vcpu)
4121 {
4122         int rc, exit_reason;
4123         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4124
4125         /*
4126          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4127          * ning the guest), so that memslots (and other stuff) are protected
4128          */
4129         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4130
4131         do {
4132                 rc = vcpu_pre_run(vcpu);
4133                 if (rc)
4134                         break;
4135
4136                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4137                 /*
4138                  * As PF_VCPU will be used in fault handler, between
4139                  * guest_enter and guest_exit should be no uaccess.
4140                  */
4141                 local_irq_disable();
4142                 guest_enter_irqoff();
4143                 __disable_cpu_timer_accounting(vcpu);
4144                 local_irq_enable();
4145                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4146                         memcpy(sie_page->pv_grregs,
4147                                vcpu->run->s.regs.gprs,
4148                                sizeof(sie_page->pv_grregs));
4149                 }
4150                 exit_reason = sie64a(vcpu->arch.sie_block,
4151                                      vcpu->run->s.regs.gprs);
4152                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153                         memcpy(vcpu->run->s.regs.gprs,
4154                                sie_page->pv_grregs,
4155                                sizeof(sie_page->pv_grregs));
4156                         /*
4157                          * We're not allowed to inject interrupts on intercepts
4158                          * that leave the guest state in an "in-between" state
4159                          * where the next SIE entry will do a continuation.
4160                          * Fence interrupts in our "internal" PSW.
4161                          */
4162                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4163                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4164                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4165                         }
4166                 }
4167                 local_irq_disable();
4168                 __enable_cpu_timer_accounting(vcpu);
4169                 guest_exit_irqoff();
4170                 local_irq_enable();
4171                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4172
4173                 rc = vcpu_post_run(vcpu, exit_reason);
4174         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4175
4176         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4177         return rc;
4178 }
4179
4180 static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4181 {
4182         struct runtime_instr_cb *riccb;
4183         struct gs_cb *gscb;
4184
4185         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4186         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4187         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4188         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4189         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4190                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4191                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4192                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4193         }
4194         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4195                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4196                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4197                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4198                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4199                         kvm_clear_async_pf_completion_queue(vcpu);
4200         }
4201         /*
4202          * If userspace sets the riccb (e.g. after migration) to a valid state,
4203          * we should enable RI here instead of doing the lazy enablement.
4204          */
4205         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4206             test_kvm_facility(vcpu->kvm, 64) &&
4207             riccb->v &&
4208             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4209                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4210                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4211         }
4212         /*
4213          * If userspace sets the gscb (e.g. after migration) to non-zero,
4214          * we should enable GS here instead of doing the lazy enablement.
4215          */
4216         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4217             test_kvm_facility(vcpu->kvm, 133) &&
4218             gscb->gssm &&
4219             !vcpu->arch.gs_enabled) {
4220                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4221                 vcpu->arch.sie_block->ecb |= ECB_GS;
4222                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4223                 vcpu->arch.gs_enabled = 1;
4224         }
4225         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4226             test_kvm_facility(vcpu->kvm, 82)) {
4227                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4228                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4229         }
4230         if (MACHINE_HAS_GS) {
4231                 preempt_disable();
4232                 __ctl_set_bit(2, 4);
4233                 if (current->thread.gs_cb) {
4234                         vcpu->arch.host_gscb = current->thread.gs_cb;
4235                         save_gs_cb(vcpu->arch.host_gscb);
4236                 }
4237                 if (vcpu->arch.gs_enabled) {
4238                         current->thread.gs_cb = (struct gs_cb *)
4239                                                 &vcpu->run->s.regs.gscb;
4240                         restore_gs_cb(current->thread.gs_cb);
4241                 }
4242                 preempt_enable();
4243         }
4244         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4245 }
4246
4247 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4248 {
4249         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4250                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4251         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4252                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4253                 /* some control register changes require a tlb flush */
4254                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4255         }
4256         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4257                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4258                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4259         }
4260         save_access_regs(vcpu->arch.host_acrs);
4261         restore_access_regs(vcpu->run->s.regs.acrs);
4262         /* save host (userspace) fprs/vrs */
4263         save_fpu_regs();
4264         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4265         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4266         if (MACHINE_HAS_VX)
4267                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4268         else
4269                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4270         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4271         if (test_fp_ctl(current->thread.fpu.fpc))
4272                 /* User space provided an invalid FPC, let's clear it */
4273                 current->thread.fpu.fpc = 0;
4274
4275         /* Sync fmt2 only data */
4276         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4277                 sync_regs_fmt2(vcpu, kvm_run);
4278         } else {
4279                 /*
4280                  * In several places we have to modify our internal view to
4281                  * not do things that are disallowed by the ultravisor. For
4282                  * example we must not inject interrupts after specific exits
4283                  * (e.g. 112 prefix page not secure). We do this by turning
4284                  * off the machine check, external and I/O interrupt bits
4285                  * of our PSW copy. To avoid getting validity intercepts, we
4286                  * do only accept the condition code from userspace.
4287                  */
4288                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4289                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4290                                                    PSW_MASK_CC;
4291         }
4292
4293         kvm_run->kvm_dirty_regs = 0;
4294 }
4295
4296 static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4297 {
4298         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4299         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4300         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4301         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4302         if (MACHINE_HAS_GS) {
4303                 __ctl_set_bit(2, 4);
4304                 if (vcpu->arch.gs_enabled)
4305                         save_gs_cb(current->thread.gs_cb);
4306                 preempt_disable();
4307                 current->thread.gs_cb = vcpu->arch.host_gscb;
4308                 restore_gs_cb(vcpu->arch.host_gscb);
4309                 preempt_enable();
4310                 if (!vcpu->arch.host_gscb)
4311                         __ctl_clear_bit(2, 4);
4312                 vcpu->arch.host_gscb = NULL;
4313         }
4314         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4315 }
4316
4317 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4318 {
4319         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4320         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4321         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4322         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4323         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4324         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4325         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4326         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4327         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4328         save_access_regs(vcpu->run->s.regs.acrs);
4329         restore_access_regs(vcpu->arch.host_acrs);
4330         /* Save guest register state */
4331         save_fpu_regs();
4332         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4333         /* Restore will be done lazily at return */
4334         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4335         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4336         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4337                 store_regs_fmt2(vcpu, kvm_run);
4338 }
4339
4340 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4341 {
4342         int rc;
4343
4344         if (kvm_run->immediate_exit)
4345                 return -EINTR;
4346
4347         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4348             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4349                 return -EINVAL;
4350
4351         vcpu_load(vcpu);
4352
4353         if (guestdbg_exit_pending(vcpu)) {
4354                 kvm_s390_prepare_debug_exit(vcpu);
4355                 rc = 0;
4356                 goto out;
4357         }
4358
4359         kvm_sigset_activate(vcpu);
4360
4361         /*
4362          * no need to check the return value of vcpu_start as it can only have
4363          * an error for protvirt, but protvirt means user cpu state
4364          */
4365         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4366                 kvm_s390_vcpu_start(vcpu);
4367         } else if (is_vcpu_stopped(vcpu)) {
4368                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4369                                    vcpu->vcpu_id);
4370                 rc = -EINVAL;
4371                 goto out;
4372         }
4373
4374         sync_regs(vcpu, kvm_run);
4375         enable_cpu_timer_accounting(vcpu);
4376
4377         might_fault();
4378         rc = __vcpu_run(vcpu);
4379
4380         if (signal_pending(current) && !rc) {
4381                 kvm_run->exit_reason = KVM_EXIT_INTR;
4382                 rc = -EINTR;
4383         }
4384
4385         if (guestdbg_exit_pending(vcpu) && !rc)  {
4386                 kvm_s390_prepare_debug_exit(vcpu);
4387                 rc = 0;
4388         }
4389
4390         if (rc == -EREMOTE) {
4391                 /* userspace support is needed, kvm_run has been prepared */
4392                 rc = 0;
4393         }
4394
4395         disable_cpu_timer_accounting(vcpu);
4396         store_regs(vcpu, kvm_run);
4397
4398         kvm_sigset_deactivate(vcpu);
4399
4400         vcpu->stat.exit_userspace++;
4401 out:
4402         vcpu_put(vcpu);
4403         return rc;
4404 }
4405
4406 /*
4407  * store status at address
4408  * we use have two special cases:
4409  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4410  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4411  */
4412 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4413 {
4414         unsigned char archmode = 1;
4415         freg_t fprs[NUM_FPRS];
4416         unsigned int px;
4417         u64 clkcomp, cputm;
4418         int rc;
4419
4420         px = kvm_s390_get_prefix(vcpu);
4421         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4422                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4423                         return -EFAULT;
4424                 gpa = 0;
4425         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4426                 if (write_guest_real(vcpu, 163, &archmode, 1))
4427                         return -EFAULT;
4428                 gpa = px;
4429         } else
4430                 gpa -= __LC_FPREGS_SAVE_AREA;
4431
4432         /* manually convert vector registers if necessary */
4433         if (MACHINE_HAS_VX) {
4434                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4435                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4436                                      fprs, 128);
4437         } else {
4438                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4439                                      vcpu->run->s.regs.fprs, 128);
4440         }
4441         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4442                               vcpu->run->s.regs.gprs, 128);
4443         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4444                               &vcpu->arch.sie_block->gpsw, 16);
4445         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4446                               &px, 4);
4447         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4448                               &vcpu->run->s.regs.fpc, 4);
4449         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4450                               &vcpu->arch.sie_block->todpr, 4);
4451         cputm = kvm_s390_get_cpu_timer(vcpu);
4452         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4453                               &cputm, 8);
4454         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4455         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4456                               &clkcomp, 8);
4457         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4458                               &vcpu->run->s.regs.acrs, 64);
4459         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4460                               &vcpu->arch.sie_block->gcr, 128);
4461         return rc ? -EFAULT : 0;
4462 }
4463
4464 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4465 {
4466         /*
4467          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4468          * switch in the run ioctl. Let's update our copies before we save
4469          * it into the save area
4470          */
4471         save_fpu_regs();
4472         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4473         save_access_regs(vcpu->run->s.regs.acrs);
4474
4475         return kvm_s390_store_status_unloaded(vcpu, addr);
4476 }
4477
4478 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4479 {
4480         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4481         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4482 }
4483
4484 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4485 {
4486         unsigned int i;
4487         struct kvm_vcpu *vcpu;
4488
4489         kvm_for_each_vcpu(i, vcpu, kvm) {
4490                 __disable_ibs_on_vcpu(vcpu);
4491         }
4492 }
4493
4494 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4495 {
4496         if (!sclp.has_ibs)
4497                 return;
4498         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4499         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4500 }
4501
4502 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4503 {
4504         int i, online_vcpus, r = 0, started_vcpus = 0;
4505
4506         if (!is_vcpu_stopped(vcpu))
4507                 return 0;
4508
4509         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4510         /* Only one cpu at a time may enter/leave the STOPPED state. */
4511         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4512         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4513
4514         /* Let's tell the UV that we want to change into the operating state */
4515         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4516                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4517                 if (r) {
4518                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4519                         return r;
4520                 }
4521         }
4522
4523         for (i = 0; i < online_vcpus; i++) {
4524                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4525                         started_vcpus++;
4526         }
4527
4528         if (started_vcpus == 0) {
4529                 /* we're the only active VCPU -> speed it up */
4530                 __enable_ibs_on_vcpu(vcpu);
4531         } else if (started_vcpus == 1) {
4532                 /*
4533                  * As we are starting a second VCPU, we have to disable
4534                  * the IBS facility on all VCPUs to remove potentially
4535                  * oustanding ENABLE requests.
4536                  */
4537                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4538         }
4539
4540         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4541         /*
4542          * The real PSW might have changed due to a RESTART interpreted by the
4543          * ultravisor. We block all interrupts and let the next sie exit
4544          * refresh our view.
4545          */
4546         if (kvm_s390_pv_cpu_is_protected(vcpu))
4547                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4548         /*
4549          * Another VCPU might have used IBS while we were offline.
4550          * Let's play safe and flush the VCPU at startup.
4551          */
4552         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4553         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4554         return 0;
4555 }
4556
4557 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4558 {
4559         int i, online_vcpus, r = 0, started_vcpus = 0;
4560         struct kvm_vcpu *started_vcpu = NULL;
4561
4562         if (is_vcpu_stopped(vcpu))
4563                 return 0;
4564
4565         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4566         /* Only one cpu at a time may enter/leave the STOPPED state. */
4567         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4568         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4569
4570         /* Let's tell the UV that we want to change into the stopped state */
4571         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4572                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4573                 if (r) {
4574                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4575                         return r;
4576                 }
4577         }
4578
4579         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4580         kvm_s390_clear_stop_irq(vcpu);
4581
4582         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4583         __disable_ibs_on_vcpu(vcpu);
4584
4585         for (i = 0; i < online_vcpus; i++) {
4586                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4587                         started_vcpus++;
4588                         started_vcpu = vcpu->kvm->vcpus[i];
4589                 }
4590         }
4591
4592         if (started_vcpus == 1) {
4593                 /*
4594                  * As we only have one VCPU left, we want to enable the
4595                  * IBS facility for that VCPU to speed it up.
4596                  */
4597                 __enable_ibs_on_vcpu(started_vcpu);
4598         }
4599
4600         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4601         return 0;
4602 }
4603
4604 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4605                                      struct kvm_enable_cap *cap)
4606 {
4607         int r;
4608
4609         if (cap->flags)
4610                 return -EINVAL;
4611
4612         switch (cap->cap) {
4613         case KVM_CAP_S390_CSS_SUPPORT:
4614                 if (!vcpu->kvm->arch.css_support) {
4615                         vcpu->kvm->arch.css_support = 1;
4616                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4617                         trace_kvm_s390_enable_css(vcpu->kvm);
4618                 }
4619                 r = 0;
4620                 break;
4621         default:
4622                 r = -EINVAL;
4623                 break;
4624         }
4625         return r;
4626 }
4627
4628 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4629                                    struct kvm_s390_mem_op *mop)
4630 {
4631         void __user *uaddr = (void __user *)mop->buf;
4632         int r = 0;
4633
4634         if (mop->flags || !mop->size)
4635                 return -EINVAL;
4636         if (mop->size + mop->sida_offset < mop->size)
4637                 return -EINVAL;
4638         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4639                 return -E2BIG;
4640
4641         switch (mop->op) {
4642         case KVM_S390_MEMOP_SIDA_READ:
4643                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4644                                  mop->sida_offset), mop->size))
4645                         r = -EFAULT;
4646
4647                 break;
4648         case KVM_S390_MEMOP_SIDA_WRITE:
4649                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4650                                    mop->sida_offset), uaddr, mop->size))
4651                         r = -EFAULT;
4652                 break;
4653         }
4654         return r;
4655 }
4656 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4657                                   struct kvm_s390_mem_op *mop)
4658 {
4659         void __user *uaddr = (void __user *)mop->buf;
4660         void *tmpbuf = NULL;
4661         int r = 0;
4662         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4663                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4664
4665         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4666                 return -EINVAL;
4667
4668         if (mop->size > MEM_OP_MAX_SIZE)
4669                 return -E2BIG;
4670
4671         if (kvm_s390_pv_cpu_is_protected(vcpu))
4672                 return -EINVAL;
4673
4674         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4675                 tmpbuf = vmalloc(mop->size);
4676                 if (!tmpbuf)
4677                         return -ENOMEM;
4678         }
4679
4680         switch (mop->op) {
4681         case KVM_S390_MEMOP_LOGICAL_READ:
4682                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4683                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4684                                             mop->size, GACC_FETCH);
4685                         break;
4686                 }
4687                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4688                 if (r == 0) {
4689                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4690                                 r = -EFAULT;
4691                 }
4692                 break;
4693         case KVM_S390_MEMOP_LOGICAL_WRITE:
4694                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4695                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4696                                             mop->size, GACC_STORE);
4697                         break;
4698                 }
4699                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4700                         r = -EFAULT;
4701                         break;
4702                 }
4703                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4704                 break;
4705         }
4706
4707         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4708                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4709
4710         vfree(tmpbuf);
4711         return r;
4712 }
4713
4714 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4715                                       struct kvm_s390_mem_op *mop)
4716 {
4717         int r, srcu_idx;
4718
4719         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4720
4721         switch (mop->op) {
4722         case KVM_S390_MEMOP_LOGICAL_READ:
4723         case KVM_S390_MEMOP_LOGICAL_WRITE:
4724                 r = kvm_s390_guest_mem_op(vcpu, mop);
4725                 break;
4726         case KVM_S390_MEMOP_SIDA_READ:
4727         case KVM_S390_MEMOP_SIDA_WRITE:
4728                 /* we are locked against sida going away by the vcpu->mutex */
4729                 r = kvm_s390_guest_sida_op(vcpu, mop);
4730                 break;
4731         default:
4732                 r = -EINVAL;
4733         }
4734
4735         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4736         return r;
4737 }
4738
4739 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4740                                unsigned int ioctl, unsigned long arg)
4741 {
4742         struct kvm_vcpu *vcpu = filp->private_data;
4743         void __user *argp = (void __user *)arg;
4744
4745         switch (ioctl) {
4746         case KVM_S390_IRQ: {
4747                 struct kvm_s390_irq s390irq;
4748
4749                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4750                         return -EFAULT;
4751                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4752         }
4753         case KVM_S390_INTERRUPT: {
4754                 struct kvm_s390_interrupt s390int;
4755                 struct kvm_s390_irq s390irq = {};
4756
4757                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4758                         return -EFAULT;
4759                 if (s390int_to_s390irq(&s390int, &s390irq))
4760                         return -EINVAL;
4761                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4762         }
4763         }
4764         return -ENOIOCTLCMD;
4765 }
4766
4767 long kvm_arch_vcpu_ioctl(struct file *filp,
4768                          unsigned int ioctl, unsigned long arg)
4769 {
4770         struct kvm_vcpu *vcpu = filp->private_data;
4771         void __user *argp = (void __user *)arg;
4772         int idx;
4773         long r;
4774         u16 rc, rrc;
4775
4776         vcpu_load(vcpu);
4777
4778         switch (ioctl) {
4779         case KVM_S390_STORE_STATUS:
4780                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4781                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4782                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4783                 break;
4784         case KVM_S390_SET_INITIAL_PSW: {
4785                 psw_t psw;
4786
4787                 r = -EFAULT;
4788                 if (copy_from_user(&psw, argp, sizeof(psw)))
4789                         break;
4790                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4791                 break;
4792         }
4793         case KVM_S390_CLEAR_RESET:
4794                 r = 0;
4795                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4796                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4797                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4798                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4799                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4800                                    rc, rrc);
4801                 }
4802                 break;
4803         case KVM_S390_INITIAL_RESET:
4804                 r = 0;
4805                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4806                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4807                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4808                                           UVC_CMD_CPU_RESET_INITIAL,
4809                                           &rc, &rrc);
4810                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4811                                    rc, rrc);
4812                 }
4813                 break;
4814         case KVM_S390_NORMAL_RESET:
4815                 r = 0;
4816                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4817                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4818                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4819                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4820                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4821                                    rc, rrc);
4822                 }
4823                 break;
4824         case KVM_SET_ONE_REG:
4825         case KVM_GET_ONE_REG: {
4826                 struct kvm_one_reg reg;
4827                 r = -EINVAL;
4828                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4829                         break;
4830                 r = -EFAULT;
4831                 if (copy_from_user(&reg, argp, sizeof(reg)))
4832                         break;
4833                 if (ioctl == KVM_SET_ONE_REG)
4834                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4835                 else
4836                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4837                 break;
4838         }
4839 #ifdef CONFIG_KVM_S390_UCONTROL
4840         case KVM_S390_UCAS_MAP: {
4841                 struct kvm_s390_ucas_mapping ucasmap;
4842
4843                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4844                         r = -EFAULT;
4845                         break;
4846                 }
4847
4848                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4849                         r = -EINVAL;
4850                         break;
4851                 }
4852
4853                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4854                                      ucasmap.vcpu_addr, ucasmap.length);
4855                 break;
4856         }
4857         case KVM_S390_UCAS_UNMAP: {
4858                 struct kvm_s390_ucas_mapping ucasmap;
4859
4860                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4861                         r = -EFAULT;
4862                         break;
4863                 }
4864
4865                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4866                         r = -EINVAL;
4867                         break;
4868                 }
4869
4870                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4871                         ucasmap.length);
4872                 break;
4873         }
4874 #endif
4875         case KVM_S390_VCPU_FAULT: {
4876                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4877                 break;
4878         }
4879         case KVM_ENABLE_CAP:
4880         {
4881                 struct kvm_enable_cap cap;
4882                 r = -EFAULT;
4883                 if (copy_from_user(&cap, argp, sizeof(cap)))
4884                         break;
4885                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4886                 break;
4887         }
4888         case KVM_S390_MEM_OP: {
4889                 struct kvm_s390_mem_op mem_op;
4890
4891                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4892                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4893                 else
4894                         r = -EFAULT;
4895                 break;
4896         }
4897         case KVM_S390_SET_IRQ_STATE: {
4898                 struct kvm_s390_irq_state irq_state;
4899
4900                 r = -EFAULT;
4901                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4902                         break;
4903                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4904                     irq_state.len == 0 ||
4905                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4906                         r = -EINVAL;
4907                         break;
4908                 }
4909                 /* do not use irq_state.flags, it will break old QEMUs */
4910                 r = kvm_s390_set_irq_state(vcpu,
4911                                            (void __user *) irq_state.buf,
4912                                            irq_state.len);
4913                 break;
4914         }
4915         case KVM_S390_GET_IRQ_STATE: {
4916                 struct kvm_s390_irq_state irq_state;
4917
4918                 r = -EFAULT;
4919                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4920                         break;
4921                 if (irq_state.len == 0) {
4922                         r = -EINVAL;
4923                         break;
4924                 }
4925                 /* do not use irq_state.flags, it will break old QEMUs */
4926                 r = kvm_s390_get_irq_state(vcpu,
4927                                            (__u8 __user *)  irq_state.buf,
4928                                            irq_state.len);
4929                 break;
4930         }
4931         default:
4932                 r = -ENOTTY;
4933         }
4934
4935         vcpu_put(vcpu);
4936         return r;
4937 }
4938
4939 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4940 {
4941 #ifdef CONFIG_KVM_S390_UCONTROL
4942         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4943                  && (kvm_is_ucontrol(vcpu->kvm))) {
4944                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4945                 get_page(vmf->page);
4946                 return 0;
4947         }
4948 #endif
4949         return VM_FAULT_SIGBUS;
4950 }
4951
4952 /* Section: memory related */
4953 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4954                                    struct kvm_memory_slot *memslot,
4955                                    const struct kvm_userspace_memory_region *mem,
4956                                    enum kvm_mr_change change)
4957 {
4958         /* A few sanity checks. We can have memory slots which have to be
4959            located/ended at a segment boundary (1MB). The memory in userland is
4960            ok to be fragmented into various different vmas. It is okay to mmap()
4961            and munmap() stuff in this slot after doing this call at any time */
4962
4963         if (mem->userspace_addr & 0xffffful)
4964                 return -EINVAL;
4965
4966         if (mem->memory_size & 0xffffful)
4967                 return -EINVAL;
4968
4969         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4970                 return -EINVAL;
4971
4972         /* When we are protected, we should not change the memory slots */
4973         if (kvm_s390_pv_get_handle(kvm))
4974                 return -EINVAL;
4975         return 0;
4976 }
4977
4978 void kvm_arch_commit_memory_region(struct kvm *kvm,
4979                                 const struct kvm_userspace_memory_region *mem,
4980                                 struct kvm_memory_slot *old,
4981                                 const struct kvm_memory_slot *new,
4982                                 enum kvm_mr_change change)
4983 {
4984         int rc = 0;
4985
4986         switch (change) {
4987         case KVM_MR_DELETE:
4988                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4989                                         old->npages * PAGE_SIZE);
4990                 break;
4991         case KVM_MR_MOVE:
4992                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4993                                         old->npages * PAGE_SIZE);
4994                 if (rc)
4995                         break;
4996                 fallthrough;
4997         case KVM_MR_CREATE:
4998                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4999                                       mem->guest_phys_addr, mem->memory_size);
5000                 break;
5001         case KVM_MR_FLAGS_ONLY:
5002                 break;
5003         default:
5004                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5005         }
5006         if (rc)
5007                 pr_warn("failed to commit memory region\n");
5008         return;
5009 }
5010
5011 static inline unsigned long nonhyp_mask(int i)
5012 {
5013         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5014
5015         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5016 }
5017
5018 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5019 {
5020         vcpu->valid_wakeup = false;
5021 }
5022
5023 static int __init kvm_s390_init(void)
5024 {
5025         int i;
5026
5027         if (!sclp.has_sief2) {
5028                 pr_info("SIE is not available\n");
5029                 return -ENODEV;
5030         }
5031
5032         if (nested && hpage) {
5033                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5034                 return -EINVAL;
5035         }
5036
5037         for (i = 0; i < 16; i++)
5038                 kvm_s390_fac_base[i] |=
5039                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5040
5041         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5042 }
5043
5044 static void __exit kvm_s390_exit(void)
5045 {
5046         kvm_exit();
5047 }
5048
5049 module_init(kvm_s390_init);
5050 module_exit(kvm_s390_exit);
5051
5052 /*
5053  * Enable autoloading of the kvm module.
5054  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5055  * since x86 takes a different approach.
5056  */
5057 #include <linux/miscdevice.h>
5058 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5059 MODULE_ALIAS("devname:kvm");