Merge tag 'nfs-for-5.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
11  *               Jason J. Herne <jjherne@us.ibm.com>
12  */
13
14 #define KMSG_COMPONENT "kvm-s390"
15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include <asm/ap.h>
47 #include <asm/uv.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
62
63 struct kvm_stats_debugfs_item debugfs_entries[] = {
64         { "userspace_handled", VCPU_STAT(exit_userspace) },
65         { "exit_null", VCPU_STAT(exit_null) },
66         { "exit_validity", VCPU_STAT(exit_validity) },
67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
68         { "exit_external_request", VCPU_STAT(exit_external_request) },
69         { "exit_io_request", VCPU_STAT(exit_io_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
80         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
81         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
82         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
83         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
84         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
85         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
86         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
87         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
88         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
89         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
90         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
91         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
92         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
93         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
94         { "deliver_program", VCPU_STAT(deliver_program) },
95         { "deliver_io", VCPU_STAT(deliver_io) },
96         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
97         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
98         { "inject_ckc", VCPU_STAT(inject_ckc) },
99         { "inject_cputm", VCPU_STAT(inject_cputm) },
100         { "inject_external_call", VCPU_STAT(inject_external_call) },
101         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
102         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
103         { "inject_io", VM_STAT(inject_io) },
104         { "inject_mchk", VCPU_STAT(inject_mchk) },
105         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
106         { "inject_program", VCPU_STAT(inject_program) },
107         { "inject_restart", VCPU_STAT(inject_restart) },
108         { "inject_service_signal", VM_STAT(inject_service_signal) },
109         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
110         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
111         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
112         { "inject_virtio", VM_STAT(inject_virtio) },
113         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
114         { "instruction_gs", VCPU_STAT(instruction_gs) },
115         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
116         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
117         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
118         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
119         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
120         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
121         { "instruction_sck", VCPU_STAT(instruction_sck) },
122         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
123         { "instruction_spx", VCPU_STAT(instruction_spx) },
124         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
125         { "instruction_stap", VCPU_STAT(instruction_stap) },
126         { "instruction_iske", VCPU_STAT(instruction_iske) },
127         { "instruction_ri", VCPU_STAT(instruction_ri) },
128         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
129         { "instruction_sske", VCPU_STAT(instruction_sske) },
130         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
131         { "instruction_essa", VCPU_STAT(instruction_essa) },
132         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
133         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
134         { "instruction_tb", VCPU_STAT(instruction_tb) },
135         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
136         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
137         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
138         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
139         { "instruction_sie", VCPU_STAT(instruction_sie) },
140         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
141         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
142         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
143         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
144         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
145         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
146         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
147         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
148         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
149         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
150         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
151         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
152         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
153         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
154         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
155         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
156         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
157         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
158         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
159         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
160         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
161         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
162         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
163         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
164         { NULL }
165 };
166
167 struct kvm_s390_tod_clock_ext {
168         __u8 epoch_idx;
169         __u64 tod;
170         __u8 reserved[7];
171 } __packed;
172
173 /* allow nested virtualization in KVM (if enabled by user space) */
174 static int nested;
175 module_param(nested, int, S_IRUGO);
176 MODULE_PARM_DESC(nested, "Nested virtualization support");
177
178 /* allow 1m huge page guest backing, if !nested */
179 static int hpage;
180 module_param(hpage, int, 0444);
181 MODULE_PARM_DESC(hpage, "1m huge page backing support");
182
183 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
184 static u8 halt_poll_max_steal = 10;
185 module_param(halt_poll_max_steal, byte, 0644);
186 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
187
188 /* if set to true, the GISA will be initialized and used if available */
189 static bool use_gisa  = true;
190 module_param(use_gisa, bool, 0644);
191 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
192
193 /*
194  * For now we handle at most 16 double words as this is what the s390 base
195  * kernel handles and stores in the prefix page. If we ever need to go beyond
196  * this, this requires changes to code, but the external uapi can stay.
197  */
198 #define SIZE_INTERNAL 16
199
200 /*
201  * Base feature mask that defines default mask for facilities. Consists of the
202  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
203  */
204 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
205 /*
206  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
207  * and defines the facilities that can be enabled via a cpu model.
208  */
209 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
210
211 static unsigned long kvm_s390_fac_size(void)
212 {
213         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
214         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
215         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
216                 sizeof(S390_lowcore.stfle_fac_list));
217
218         return SIZE_INTERNAL;
219 }
220
221 /* available cpu features supported by kvm */
222 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
223 /* available subfunctions indicated via query / "test bit" */
224 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
225
226 static struct gmap_notifier gmap_notifier;
227 static struct gmap_notifier vsie_gmap_notifier;
228 debug_info_t *kvm_s390_dbf;
229 debug_info_t *kvm_s390_dbf_uv;
230
231 /* Section: not file related */
232 int kvm_arch_hardware_enable(void)
233 {
234         /* every s390 is virtualization enabled ;-) */
235         return 0;
236 }
237
238 int kvm_arch_check_processor_compat(void *opaque)
239 {
240         return 0;
241 }
242
243 /* forward declarations */
244 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
245                               unsigned long end);
246 static int sca_switch_to_extended(struct kvm *kvm);
247
248 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
249 {
250         u8 delta_idx = 0;
251
252         /*
253          * The TOD jumps by delta, we have to compensate this by adding
254          * -delta to the epoch.
255          */
256         delta = -delta;
257
258         /* sign-extension - we're adding to signed values below */
259         if ((s64)delta < 0)
260                 delta_idx = -1;
261
262         scb->epoch += delta;
263         if (scb->ecd & ECD_MEF) {
264                 scb->epdx += delta_idx;
265                 if (scb->epoch < delta)
266                         scb->epdx += 1;
267         }
268 }
269
270 /*
271  * This callback is executed during stop_machine(). All CPUs are therefore
272  * temporarily stopped. In order not to change guest behavior, we have to
273  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
274  * so a CPU won't be stopped while calculating with the epoch.
275  */
276 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
277                           void *v)
278 {
279         struct kvm *kvm;
280         struct kvm_vcpu *vcpu;
281         int i;
282         unsigned long long *delta = v;
283
284         list_for_each_entry(kvm, &vm_list, vm_list) {
285                 kvm_for_each_vcpu(i, vcpu, kvm) {
286                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
287                         if (i == 0) {
288                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
289                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
290                         }
291                         if (vcpu->arch.cputm_enabled)
292                                 vcpu->arch.cputm_start += *delta;
293                         if (vcpu->arch.vsie_block)
294                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
295                                                    *delta);
296                 }
297         }
298         return NOTIFY_OK;
299 }
300
301 static struct notifier_block kvm_clock_notifier = {
302         .notifier_call = kvm_clock_sync,
303 };
304
305 int kvm_arch_hardware_setup(void *opaque)
306 {
307         gmap_notifier.notifier_call = kvm_gmap_notifier;
308         gmap_register_pte_notifier(&gmap_notifier);
309         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
310         gmap_register_pte_notifier(&vsie_gmap_notifier);
311         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
312                                        &kvm_clock_notifier);
313         return 0;
314 }
315
316 void kvm_arch_hardware_unsetup(void)
317 {
318         gmap_unregister_pte_notifier(&gmap_notifier);
319         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
320         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
321                                          &kvm_clock_notifier);
322 }
323
324 static void allow_cpu_feat(unsigned long nr)
325 {
326         set_bit_inv(nr, kvm_s390_available_cpu_feat);
327 }
328
329 static inline int plo_test_bit(unsigned char nr)
330 {
331         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
332         int cc;
333
334         asm volatile(
335                 /* Parameter registers are ignored for "test bit" */
336                 "       plo     0,0,0,0(0)\n"
337                 "       ipm     %0\n"
338                 "       srl     %0,28\n"
339                 : "=d" (cc)
340                 : "d" (r0)
341                 : "cc");
342         return cc == 0;
343 }
344
345 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
346 {
347         register unsigned long r0 asm("0") = 0; /* query function */
348         register unsigned long r1 asm("1") = (unsigned long) query;
349
350         asm volatile(
351                 /* Parameter regs are ignored */
352                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
353                 :
354                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
355                 : "cc", "memory");
356 }
357
358 #define INSN_SORTL 0xb938
359 #define INSN_DFLTCC 0xb939
360
361 static void kvm_s390_cpu_feat_init(void)
362 {
363         int i;
364
365         for (i = 0; i < 256; ++i) {
366                 if (plo_test_bit(i))
367                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
368         }
369
370         if (test_facility(28)) /* TOD-clock steering */
371                 ptff(kvm_s390_available_subfunc.ptff,
372                      sizeof(kvm_s390_available_subfunc.ptff),
373                      PTFF_QAF);
374
375         if (test_facility(17)) { /* MSA */
376                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
377                               kvm_s390_available_subfunc.kmac);
378                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
379                               kvm_s390_available_subfunc.kmc);
380                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
381                               kvm_s390_available_subfunc.km);
382                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
383                               kvm_s390_available_subfunc.kimd);
384                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
385                               kvm_s390_available_subfunc.klmd);
386         }
387         if (test_facility(76)) /* MSA3 */
388                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
389                               kvm_s390_available_subfunc.pckmo);
390         if (test_facility(77)) { /* MSA4 */
391                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
392                               kvm_s390_available_subfunc.kmctr);
393                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
394                               kvm_s390_available_subfunc.kmf);
395                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
396                               kvm_s390_available_subfunc.kmo);
397                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
398                               kvm_s390_available_subfunc.pcc);
399         }
400         if (test_facility(57)) /* MSA5 */
401                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
402                               kvm_s390_available_subfunc.ppno);
403
404         if (test_facility(146)) /* MSA8 */
405                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
406                               kvm_s390_available_subfunc.kma);
407
408         if (test_facility(155)) /* MSA9 */
409                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
410                               kvm_s390_available_subfunc.kdsa);
411
412         if (test_facility(150)) /* SORTL */
413                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
414
415         if (test_facility(151)) /* DFLTCC */
416                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
417
418         if (MACHINE_HAS_ESOP)
419                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
420         /*
421          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
422          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
423          */
424         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
425             !test_facility(3) || !nested)
426                 return;
427         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
428         if (sclp.has_64bscao)
429                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
430         if (sclp.has_siif)
431                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
432         if (sclp.has_gpere)
433                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
434         if (sclp.has_gsls)
435                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
436         if (sclp.has_ib)
437                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
438         if (sclp.has_cei)
439                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
440         if (sclp.has_ibs)
441                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
442         if (sclp.has_kss)
443                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
444         /*
445          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
446          * all skey handling functions read/set the skey from the PGSTE
447          * instead of the real storage key.
448          *
449          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
450          * pages being detected as preserved although they are resident.
451          *
452          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
453          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
454          *
455          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
456          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
457          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
458          *
459          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
460          * cannot easily shadow the SCA because of the ipte lock.
461          */
462 }
463
464 int kvm_arch_init(void *opaque)
465 {
466         int rc = -ENOMEM;
467
468         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
469         if (!kvm_s390_dbf)
470                 return -ENOMEM;
471
472         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
473         if (!kvm_s390_dbf_uv)
474                 goto out;
475
476         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
477             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
478                 goto out;
479
480         kvm_s390_cpu_feat_init();
481
482         /* Register floating interrupt controller interface. */
483         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
484         if (rc) {
485                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
486                 goto out;
487         }
488
489         rc = kvm_s390_gib_init(GAL_ISC);
490         if (rc)
491                 goto out;
492
493         return 0;
494
495 out:
496         kvm_arch_exit();
497         return rc;
498 }
499
500 void kvm_arch_exit(void)
501 {
502         kvm_s390_gib_destroy();
503         debug_unregister(kvm_s390_dbf);
504         debug_unregister(kvm_s390_dbf_uv);
505 }
506
507 /* Section: device related */
508 long kvm_arch_dev_ioctl(struct file *filp,
509                         unsigned int ioctl, unsigned long arg)
510 {
511         if (ioctl == KVM_S390_ENABLE_SIE)
512                 return s390_enable_sie();
513         return -EINVAL;
514 }
515
516 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
517 {
518         int r;
519
520         switch (ext) {
521         case KVM_CAP_S390_PSW:
522         case KVM_CAP_S390_GMAP:
523         case KVM_CAP_SYNC_MMU:
524 #ifdef CONFIG_KVM_S390_UCONTROL
525         case KVM_CAP_S390_UCONTROL:
526 #endif
527         case KVM_CAP_ASYNC_PF:
528         case KVM_CAP_SYNC_REGS:
529         case KVM_CAP_ONE_REG:
530         case KVM_CAP_ENABLE_CAP:
531         case KVM_CAP_S390_CSS_SUPPORT:
532         case KVM_CAP_IOEVENTFD:
533         case KVM_CAP_DEVICE_CTRL:
534         case KVM_CAP_S390_IRQCHIP:
535         case KVM_CAP_VM_ATTRIBUTES:
536         case KVM_CAP_MP_STATE:
537         case KVM_CAP_IMMEDIATE_EXIT:
538         case KVM_CAP_S390_INJECT_IRQ:
539         case KVM_CAP_S390_USER_SIGP:
540         case KVM_CAP_S390_USER_STSI:
541         case KVM_CAP_S390_SKEYS:
542         case KVM_CAP_S390_IRQ_STATE:
543         case KVM_CAP_S390_USER_INSTR0:
544         case KVM_CAP_S390_CMMA_MIGRATION:
545         case KVM_CAP_S390_AIS:
546         case KVM_CAP_S390_AIS_MIGRATION:
547         case KVM_CAP_S390_VCPU_RESETS:
548                 r = 1;
549                 break;
550         case KVM_CAP_S390_HPAGE_1M:
551                 r = 0;
552                 if (hpage && !kvm_is_ucontrol(kvm))
553                         r = 1;
554                 break;
555         case KVM_CAP_S390_MEM_OP:
556                 r = MEM_OP_MAX_SIZE;
557                 break;
558         case KVM_CAP_NR_VCPUS:
559         case KVM_CAP_MAX_VCPUS:
560         case KVM_CAP_MAX_VCPU_ID:
561                 r = KVM_S390_BSCA_CPU_SLOTS;
562                 if (!kvm_s390_use_sca_entries())
563                         r = KVM_MAX_VCPUS;
564                 else if (sclp.has_esca && sclp.has_64bscao)
565                         r = KVM_S390_ESCA_CPU_SLOTS;
566                 break;
567         case KVM_CAP_S390_COW:
568                 r = MACHINE_HAS_ESOP;
569                 break;
570         case KVM_CAP_S390_VECTOR_REGISTERS:
571                 r = MACHINE_HAS_VX;
572                 break;
573         case KVM_CAP_S390_RI:
574                 r = test_facility(64);
575                 break;
576         case KVM_CAP_S390_GS:
577                 r = test_facility(133);
578                 break;
579         case KVM_CAP_S390_BPB:
580                 r = test_facility(82);
581                 break;
582         case KVM_CAP_S390_PROTECTED:
583                 r = is_prot_virt_host();
584                 break;
585         default:
586                 r = 0;
587         }
588         return r;
589 }
590
591 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
592 {
593         int i;
594         gfn_t cur_gfn, last_gfn;
595         unsigned long gaddr, vmaddr;
596         struct gmap *gmap = kvm->arch.gmap;
597         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
598
599         /* Loop over all guest segments */
600         cur_gfn = memslot->base_gfn;
601         last_gfn = memslot->base_gfn + memslot->npages;
602         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
603                 gaddr = gfn_to_gpa(cur_gfn);
604                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
605                 if (kvm_is_error_hva(vmaddr))
606                         continue;
607
608                 bitmap_zero(bitmap, _PAGE_ENTRIES);
609                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
610                 for (i = 0; i < _PAGE_ENTRIES; i++) {
611                         if (test_bit(i, bitmap))
612                                 mark_page_dirty(kvm, cur_gfn + i);
613                 }
614
615                 if (fatal_signal_pending(current))
616                         return;
617                 cond_resched();
618         }
619 }
620
621 /* Section: vm related */
622 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
623
624 /*
625  * Get (and clear) the dirty memory log for a memory slot.
626  */
627 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
628                                struct kvm_dirty_log *log)
629 {
630         int r;
631         unsigned long n;
632         struct kvm_memory_slot *memslot;
633         int is_dirty;
634
635         if (kvm_is_ucontrol(kvm))
636                 return -EINVAL;
637
638         mutex_lock(&kvm->slots_lock);
639
640         r = -EINVAL;
641         if (log->slot >= KVM_USER_MEM_SLOTS)
642                 goto out;
643
644         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
645         if (r)
646                 goto out;
647
648         /* Clear the dirty log */
649         if (is_dirty) {
650                 n = kvm_dirty_bitmap_bytes(memslot);
651                 memset(memslot->dirty_bitmap, 0, n);
652         }
653         r = 0;
654 out:
655         mutex_unlock(&kvm->slots_lock);
656         return r;
657 }
658
659 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
660 {
661         unsigned int i;
662         struct kvm_vcpu *vcpu;
663
664         kvm_for_each_vcpu(i, vcpu, kvm) {
665                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
666         }
667 }
668
669 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
670 {
671         int r;
672
673         if (cap->flags)
674                 return -EINVAL;
675
676         switch (cap->cap) {
677         case KVM_CAP_S390_IRQCHIP:
678                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
679                 kvm->arch.use_irqchip = 1;
680                 r = 0;
681                 break;
682         case KVM_CAP_S390_USER_SIGP:
683                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
684                 kvm->arch.user_sigp = 1;
685                 r = 0;
686                 break;
687         case KVM_CAP_S390_VECTOR_REGISTERS:
688                 mutex_lock(&kvm->lock);
689                 if (kvm->created_vcpus) {
690                         r = -EBUSY;
691                 } else if (MACHINE_HAS_VX) {
692                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
693                         set_kvm_facility(kvm->arch.model.fac_list, 129);
694                         if (test_facility(134)) {
695                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
696                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
697                         }
698                         if (test_facility(135)) {
699                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
700                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
701                         }
702                         if (test_facility(148)) {
703                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
704                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
705                         }
706                         if (test_facility(152)) {
707                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
708                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
709                         }
710                         r = 0;
711                 } else
712                         r = -EINVAL;
713                 mutex_unlock(&kvm->lock);
714                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
715                          r ? "(not available)" : "(success)");
716                 break;
717         case KVM_CAP_S390_RI:
718                 r = -EINVAL;
719                 mutex_lock(&kvm->lock);
720                 if (kvm->created_vcpus) {
721                         r = -EBUSY;
722                 } else if (test_facility(64)) {
723                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
724                         set_kvm_facility(kvm->arch.model.fac_list, 64);
725                         r = 0;
726                 }
727                 mutex_unlock(&kvm->lock);
728                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
729                          r ? "(not available)" : "(success)");
730                 break;
731         case KVM_CAP_S390_AIS:
732                 mutex_lock(&kvm->lock);
733                 if (kvm->created_vcpus) {
734                         r = -EBUSY;
735                 } else {
736                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
737                         set_kvm_facility(kvm->arch.model.fac_list, 72);
738                         r = 0;
739                 }
740                 mutex_unlock(&kvm->lock);
741                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
742                          r ? "(not available)" : "(success)");
743                 break;
744         case KVM_CAP_S390_GS:
745                 r = -EINVAL;
746                 mutex_lock(&kvm->lock);
747                 if (kvm->created_vcpus) {
748                         r = -EBUSY;
749                 } else if (test_facility(133)) {
750                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
751                         set_kvm_facility(kvm->arch.model.fac_list, 133);
752                         r = 0;
753                 }
754                 mutex_unlock(&kvm->lock);
755                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
756                          r ? "(not available)" : "(success)");
757                 break;
758         case KVM_CAP_S390_HPAGE_1M:
759                 mutex_lock(&kvm->lock);
760                 if (kvm->created_vcpus)
761                         r = -EBUSY;
762                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
763                         r = -EINVAL;
764                 else {
765                         r = 0;
766                         down_write(&kvm->mm->mmap_sem);
767                         kvm->mm->context.allow_gmap_hpage_1m = 1;
768                         up_write(&kvm->mm->mmap_sem);
769                         /*
770                          * We might have to create fake 4k page
771                          * tables. To avoid that the hardware works on
772                          * stale PGSTEs, we emulate these instructions.
773                          */
774                         kvm->arch.use_skf = 0;
775                         kvm->arch.use_pfmfi = 0;
776                 }
777                 mutex_unlock(&kvm->lock);
778                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
779                          r ? "(not available)" : "(success)");
780                 break;
781         case KVM_CAP_S390_USER_STSI:
782                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
783                 kvm->arch.user_stsi = 1;
784                 r = 0;
785                 break;
786         case KVM_CAP_S390_USER_INSTR0:
787                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
788                 kvm->arch.user_instr0 = 1;
789                 icpt_operexc_on_all_vcpus(kvm);
790                 r = 0;
791                 break;
792         default:
793                 r = -EINVAL;
794                 break;
795         }
796         return r;
797 }
798
799 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
800 {
801         int ret;
802
803         switch (attr->attr) {
804         case KVM_S390_VM_MEM_LIMIT_SIZE:
805                 ret = 0;
806                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
807                          kvm->arch.mem_limit);
808                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
809                         ret = -EFAULT;
810                 break;
811         default:
812                 ret = -ENXIO;
813                 break;
814         }
815         return ret;
816 }
817
818 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
819 {
820         int ret;
821         unsigned int idx;
822         switch (attr->attr) {
823         case KVM_S390_VM_MEM_ENABLE_CMMA:
824                 ret = -ENXIO;
825                 if (!sclp.has_cmma)
826                         break;
827
828                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
829                 mutex_lock(&kvm->lock);
830                 if (kvm->created_vcpus)
831                         ret = -EBUSY;
832                 else if (kvm->mm->context.allow_gmap_hpage_1m)
833                         ret = -EINVAL;
834                 else {
835                         kvm->arch.use_cmma = 1;
836                         /* Not compatible with cmma. */
837                         kvm->arch.use_pfmfi = 0;
838                         ret = 0;
839                 }
840                 mutex_unlock(&kvm->lock);
841                 break;
842         case KVM_S390_VM_MEM_CLR_CMMA:
843                 ret = -ENXIO;
844                 if (!sclp.has_cmma)
845                         break;
846                 ret = -EINVAL;
847                 if (!kvm->arch.use_cmma)
848                         break;
849
850                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
851                 mutex_lock(&kvm->lock);
852                 idx = srcu_read_lock(&kvm->srcu);
853                 s390_reset_cmma(kvm->arch.gmap->mm);
854                 srcu_read_unlock(&kvm->srcu, idx);
855                 mutex_unlock(&kvm->lock);
856                 ret = 0;
857                 break;
858         case KVM_S390_VM_MEM_LIMIT_SIZE: {
859                 unsigned long new_limit;
860
861                 if (kvm_is_ucontrol(kvm))
862                         return -EINVAL;
863
864                 if (get_user(new_limit, (u64 __user *)attr->addr))
865                         return -EFAULT;
866
867                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
868                     new_limit > kvm->arch.mem_limit)
869                         return -E2BIG;
870
871                 if (!new_limit)
872                         return -EINVAL;
873
874                 /* gmap_create takes last usable address */
875                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
876                         new_limit -= 1;
877
878                 ret = -EBUSY;
879                 mutex_lock(&kvm->lock);
880                 if (!kvm->created_vcpus) {
881                         /* gmap_create will round the limit up */
882                         struct gmap *new = gmap_create(current->mm, new_limit);
883
884                         if (!new) {
885                                 ret = -ENOMEM;
886                         } else {
887                                 gmap_remove(kvm->arch.gmap);
888                                 new->private = kvm;
889                                 kvm->arch.gmap = new;
890                                 ret = 0;
891                         }
892                 }
893                 mutex_unlock(&kvm->lock);
894                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
895                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
896                          (void *) kvm->arch.gmap->asce);
897                 break;
898         }
899         default:
900                 ret = -ENXIO;
901                 break;
902         }
903         return ret;
904 }
905
906 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
907
908 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
909 {
910         struct kvm_vcpu *vcpu;
911         int i;
912
913         kvm_s390_vcpu_block_all(kvm);
914
915         kvm_for_each_vcpu(i, vcpu, kvm) {
916                 kvm_s390_vcpu_crypto_setup(vcpu);
917                 /* recreate the shadow crycb by leaving the VSIE handler */
918                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
919         }
920
921         kvm_s390_vcpu_unblock_all(kvm);
922 }
923
924 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
925 {
926         mutex_lock(&kvm->lock);
927         switch (attr->attr) {
928         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
929                 if (!test_kvm_facility(kvm, 76)) {
930                         mutex_unlock(&kvm->lock);
931                         return -EINVAL;
932                 }
933                 get_random_bytes(
934                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
935                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
936                 kvm->arch.crypto.aes_kw = 1;
937                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
938                 break;
939         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
940                 if (!test_kvm_facility(kvm, 76)) {
941                         mutex_unlock(&kvm->lock);
942                         return -EINVAL;
943                 }
944                 get_random_bytes(
945                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
946                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
947                 kvm->arch.crypto.dea_kw = 1;
948                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
949                 break;
950         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
951                 if (!test_kvm_facility(kvm, 76)) {
952                         mutex_unlock(&kvm->lock);
953                         return -EINVAL;
954                 }
955                 kvm->arch.crypto.aes_kw = 0;
956                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
957                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
958                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
959                 break;
960         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
961                 if (!test_kvm_facility(kvm, 76)) {
962                         mutex_unlock(&kvm->lock);
963                         return -EINVAL;
964                 }
965                 kvm->arch.crypto.dea_kw = 0;
966                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
967                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
968                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
969                 break;
970         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
971                 if (!ap_instructions_available()) {
972                         mutex_unlock(&kvm->lock);
973                         return -EOPNOTSUPP;
974                 }
975                 kvm->arch.crypto.apie = 1;
976                 break;
977         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
978                 if (!ap_instructions_available()) {
979                         mutex_unlock(&kvm->lock);
980                         return -EOPNOTSUPP;
981                 }
982                 kvm->arch.crypto.apie = 0;
983                 break;
984         default:
985                 mutex_unlock(&kvm->lock);
986                 return -ENXIO;
987         }
988
989         kvm_s390_vcpu_crypto_reset_all(kvm);
990         mutex_unlock(&kvm->lock);
991         return 0;
992 }
993
994 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
995 {
996         int cx;
997         struct kvm_vcpu *vcpu;
998
999         kvm_for_each_vcpu(cx, vcpu, kvm)
1000                 kvm_s390_sync_request(req, vcpu);
1001 }
1002
1003 /*
1004  * Must be called with kvm->srcu held to avoid races on memslots, and with
1005  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1006  */
1007 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1008 {
1009         struct kvm_memory_slot *ms;
1010         struct kvm_memslots *slots;
1011         unsigned long ram_pages = 0;
1012         int slotnr;
1013
1014         /* migration mode already enabled */
1015         if (kvm->arch.migration_mode)
1016                 return 0;
1017         slots = kvm_memslots(kvm);
1018         if (!slots || !slots->used_slots)
1019                 return -EINVAL;
1020
1021         if (!kvm->arch.use_cmma) {
1022                 kvm->arch.migration_mode = 1;
1023                 return 0;
1024         }
1025         /* mark all the pages in active slots as dirty */
1026         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1027                 ms = slots->memslots + slotnr;
1028                 if (!ms->dirty_bitmap)
1029                         return -EINVAL;
1030                 /*
1031                  * The second half of the bitmap is only used on x86,
1032                  * and would be wasted otherwise, so we put it to good
1033                  * use here to keep track of the state of the storage
1034                  * attributes.
1035                  */
1036                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1037                 ram_pages += ms->npages;
1038         }
1039         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1040         kvm->arch.migration_mode = 1;
1041         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1042         return 0;
1043 }
1044
1045 /*
1046  * Must be called with kvm->slots_lock to avoid races with ourselves and
1047  * kvm_s390_vm_start_migration.
1048  */
1049 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1050 {
1051         /* migration mode already disabled */
1052         if (!kvm->arch.migration_mode)
1053                 return 0;
1054         kvm->arch.migration_mode = 0;
1055         if (kvm->arch.use_cmma)
1056                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1057         return 0;
1058 }
1059
1060 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1061                                      struct kvm_device_attr *attr)
1062 {
1063         int res = -ENXIO;
1064
1065         mutex_lock(&kvm->slots_lock);
1066         switch (attr->attr) {
1067         case KVM_S390_VM_MIGRATION_START:
1068                 res = kvm_s390_vm_start_migration(kvm);
1069                 break;
1070         case KVM_S390_VM_MIGRATION_STOP:
1071                 res = kvm_s390_vm_stop_migration(kvm);
1072                 break;
1073         default:
1074                 break;
1075         }
1076         mutex_unlock(&kvm->slots_lock);
1077
1078         return res;
1079 }
1080
1081 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1082                                      struct kvm_device_attr *attr)
1083 {
1084         u64 mig = kvm->arch.migration_mode;
1085
1086         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1087                 return -ENXIO;
1088
1089         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1090                 return -EFAULT;
1091         return 0;
1092 }
1093
1094 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1095 {
1096         struct kvm_s390_vm_tod_clock gtod;
1097
1098         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1099                 return -EFAULT;
1100
1101         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1102                 return -EINVAL;
1103         kvm_s390_set_tod_clock(kvm, &gtod);
1104
1105         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1106                 gtod.epoch_idx, gtod.tod);
1107
1108         return 0;
1109 }
1110
1111 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1112 {
1113         u8 gtod_high;
1114
1115         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1116                                            sizeof(gtod_high)))
1117                 return -EFAULT;
1118
1119         if (gtod_high != 0)
1120                 return -EINVAL;
1121         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1122
1123         return 0;
1124 }
1125
1126 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1127 {
1128         struct kvm_s390_vm_tod_clock gtod = { 0 };
1129
1130         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1131                            sizeof(gtod.tod)))
1132                 return -EFAULT;
1133
1134         kvm_s390_set_tod_clock(kvm, &gtod);
1135         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1136         return 0;
1137 }
1138
1139 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1140 {
1141         int ret;
1142
1143         if (attr->flags)
1144                 return -EINVAL;
1145
1146         switch (attr->attr) {
1147         case KVM_S390_VM_TOD_EXT:
1148                 ret = kvm_s390_set_tod_ext(kvm, attr);
1149                 break;
1150         case KVM_S390_VM_TOD_HIGH:
1151                 ret = kvm_s390_set_tod_high(kvm, attr);
1152                 break;
1153         case KVM_S390_VM_TOD_LOW:
1154                 ret = kvm_s390_set_tod_low(kvm, attr);
1155                 break;
1156         default:
1157                 ret = -ENXIO;
1158                 break;
1159         }
1160         return ret;
1161 }
1162
1163 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1164                                    struct kvm_s390_vm_tod_clock *gtod)
1165 {
1166         struct kvm_s390_tod_clock_ext htod;
1167
1168         preempt_disable();
1169
1170         get_tod_clock_ext((char *)&htod);
1171
1172         gtod->tod = htod.tod + kvm->arch.epoch;
1173         gtod->epoch_idx = 0;
1174         if (test_kvm_facility(kvm, 139)) {
1175                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1176                 if (gtod->tod < htod.tod)
1177                         gtod->epoch_idx += 1;
1178         }
1179
1180         preempt_enable();
1181 }
1182
1183 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1184 {
1185         struct kvm_s390_vm_tod_clock gtod;
1186
1187         memset(&gtod, 0, sizeof(gtod));
1188         kvm_s390_get_tod_clock(kvm, &gtod);
1189         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1190                 return -EFAULT;
1191
1192         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1193                 gtod.epoch_idx, gtod.tod);
1194         return 0;
1195 }
1196
1197 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1198 {
1199         u8 gtod_high = 0;
1200
1201         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1202                                          sizeof(gtod_high)))
1203                 return -EFAULT;
1204         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1205
1206         return 0;
1207 }
1208
1209 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1210 {
1211         u64 gtod;
1212
1213         gtod = kvm_s390_get_tod_clock_fast(kvm);
1214         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1215                 return -EFAULT;
1216         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1217
1218         return 0;
1219 }
1220
1221 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1222 {
1223         int ret;
1224
1225         if (attr->flags)
1226                 return -EINVAL;
1227
1228         switch (attr->attr) {
1229         case KVM_S390_VM_TOD_EXT:
1230                 ret = kvm_s390_get_tod_ext(kvm, attr);
1231                 break;
1232         case KVM_S390_VM_TOD_HIGH:
1233                 ret = kvm_s390_get_tod_high(kvm, attr);
1234                 break;
1235         case KVM_S390_VM_TOD_LOW:
1236                 ret = kvm_s390_get_tod_low(kvm, attr);
1237                 break;
1238         default:
1239                 ret = -ENXIO;
1240                 break;
1241         }
1242         return ret;
1243 }
1244
1245 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1246 {
1247         struct kvm_s390_vm_cpu_processor *proc;
1248         u16 lowest_ibc, unblocked_ibc;
1249         int ret = 0;
1250
1251         mutex_lock(&kvm->lock);
1252         if (kvm->created_vcpus) {
1253                 ret = -EBUSY;
1254                 goto out;
1255         }
1256         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1257         if (!proc) {
1258                 ret = -ENOMEM;
1259                 goto out;
1260         }
1261         if (!copy_from_user(proc, (void __user *)attr->addr,
1262                             sizeof(*proc))) {
1263                 kvm->arch.model.cpuid = proc->cpuid;
1264                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1265                 unblocked_ibc = sclp.ibc & 0xfff;
1266                 if (lowest_ibc && proc->ibc) {
1267                         if (proc->ibc > unblocked_ibc)
1268                                 kvm->arch.model.ibc = unblocked_ibc;
1269                         else if (proc->ibc < lowest_ibc)
1270                                 kvm->arch.model.ibc = lowest_ibc;
1271                         else
1272                                 kvm->arch.model.ibc = proc->ibc;
1273                 }
1274                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1275                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1276                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1277                          kvm->arch.model.ibc,
1278                          kvm->arch.model.cpuid);
1279                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1280                          kvm->arch.model.fac_list[0],
1281                          kvm->arch.model.fac_list[1],
1282                          kvm->arch.model.fac_list[2]);
1283         } else
1284                 ret = -EFAULT;
1285         kfree(proc);
1286 out:
1287         mutex_unlock(&kvm->lock);
1288         return ret;
1289 }
1290
1291 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1292                                        struct kvm_device_attr *attr)
1293 {
1294         struct kvm_s390_vm_cpu_feat data;
1295
1296         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1297                 return -EFAULT;
1298         if (!bitmap_subset((unsigned long *) data.feat,
1299                            kvm_s390_available_cpu_feat,
1300                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1301                 return -EINVAL;
1302
1303         mutex_lock(&kvm->lock);
1304         if (kvm->created_vcpus) {
1305                 mutex_unlock(&kvm->lock);
1306                 return -EBUSY;
1307         }
1308         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1309                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1310         mutex_unlock(&kvm->lock);
1311         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1312                          data.feat[0],
1313                          data.feat[1],
1314                          data.feat[2]);
1315         return 0;
1316 }
1317
1318 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1319                                           struct kvm_device_attr *attr)
1320 {
1321         mutex_lock(&kvm->lock);
1322         if (kvm->created_vcpus) {
1323                 mutex_unlock(&kvm->lock);
1324                 return -EBUSY;
1325         }
1326
1327         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1328                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1329                 mutex_unlock(&kvm->lock);
1330                 return -EFAULT;
1331         }
1332         mutex_unlock(&kvm->lock);
1333
1334         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1335                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1339         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1342         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1345         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1348         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1351         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1354         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1357         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1360         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1363         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1366         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1386         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1391
1392         return 0;
1393 }
1394
1395 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1396 {
1397         int ret = -ENXIO;
1398
1399         switch (attr->attr) {
1400         case KVM_S390_VM_CPU_PROCESSOR:
1401                 ret = kvm_s390_set_processor(kvm, attr);
1402                 break;
1403         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1404                 ret = kvm_s390_set_processor_feat(kvm, attr);
1405                 break;
1406         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1407                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1408                 break;
1409         }
1410         return ret;
1411 }
1412
1413 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1414 {
1415         struct kvm_s390_vm_cpu_processor *proc;
1416         int ret = 0;
1417
1418         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1419         if (!proc) {
1420                 ret = -ENOMEM;
1421                 goto out;
1422         }
1423         proc->cpuid = kvm->arch.model.cpuid;
1424         proc->ibc = kvm->arch.model.ibc;
1425         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1426                S390_ARCH_FAC_LIST_SIZE_BYTE);
1427         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1428                  kvm->arch.model.ibc,
1429                  kvm->arch.model.cpuid);
1430         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1431                  kvm->arch.model.fac_list[0],
1432                  kvm->arch.model.fac_list[1],
1433                  kvm->arch.model.fac_list[2]);
1434         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1435                 ret = -EFAULT;
1436         kfree(proc);
1437 out:
1438         return ret;
1439 }
1440
1441 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1442 {
1443         struct kvm_s390_vm_cpu_machine *mach;
1444         int ret = 0;
1445
1446         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1447         if (!mach) {
1448                 ret = -ENOMEM;
1449                 goto out;
1450         }
1451         get_cpu_id((struct cpuid *) &mach->cpuid);
1452         mach->ibc = sclp.ibc;
1453         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1454                S390_ARCH_FAC_LIST_SIZE_BYTE);
1455         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1456                sizeof(S390_lowcore.stfle_fac_list));
1457         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1458                  kvm->arch.model.ibc,
1459                  kvm->arch.model.cpuid);
1460         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1461                  mach->fac_mask[0],
1462                  mach->fac_mask[1],
1463                  mach->fac_mask[2]);
1464         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1465                  mach->fac_list[0],
1466                  mach->fac_list[1],
1467                  mach->fac_list[2]);
1468         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1469                 ret = -EFAULT;
1470         kfree(mach);
1471 out:
1472         return ret;
1473 }
1474
1475 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1476                                        struct kvm_device_attr *attr)
1477 {
1478         struct kvm_s390_vm_cpu_feat data;
1479
1480         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1481                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1482         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1483                 return -EFAULT;
1484         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1485                          data.feat[0],
1486                          data.feat[1],
1487                          data.feat[2]);
1488         return 0;
1489 }
1490
1491 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1492                                      struct kvm_device_attr *attr)
1493 {
1494         struct kvm_s390_vm_cpu_feat data;
1495
1496         bitmap_copy((unsigned long *) data.feat,
1497                     kvm_s390_available_cpu_feat,
1498                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1499         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1500                 return -EFAULT;
1501         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1502                          data.feat[0],
1503                          data.feat[1],
1504                          data.feat[2]);
1505         return 0;
1506 }
1507
1508 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1509                                           struct kvm_device_attr *attr)
1510 {
1511         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1512             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1513                 return -EFAULT;
1514
1515         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1516                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1520         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1523         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1526         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1529         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1532         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1535         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1538         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1541         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1544         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1547         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1567         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1572
1573         return 0;
1574 }
1575
1576 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1577                                         struct kvm_device_attr *attr)
1578 {
1579         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1580             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1581                 return -EFAULT;
1582
1583         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1585                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1587                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1588         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1589                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1590                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1591         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1593                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1594         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1596                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1597         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1599                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1600         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1603         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1604                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1605                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1606         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1607                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1608                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1609         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1610                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1612         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1613                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1615         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1633                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1635         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1636                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1637                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1640
1641         return 0;
1642 }
1643
1644 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1645 {
1646         int ret = -ENXIO;
1647
1648         switch (attr->attr) {
1649         case KVM_S390_VM_CPU_PROCESSOR:
1650                 ret = kvm_s390_get_processor(kvm, attr);
1651                 break;
1652         case KVM_S390_VM_CPU_MACHINE:
1653                 ret = kvm_s390_get_machine(kvm, attr);
1654                 break;
1655         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1656                 ret = kvm_s390_get_processor_feat(kvm, attr);
1657                 break;
1658         case KVM_S390_VM_CPU_MACHINE_FEAT:
1659                 ret = kvm_s390_get_machine_feat(kvm, attr);
1660                 break;
1661         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1662                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1663                 break;
1664         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1665                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1666                 break;
1667         }
1668         return ret;
1669 }
1670
1671 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673         int ret;
1674
1675         switch (attr->group) {
1676         case KVM_S390_VM_MEM_CTRL:
1677                 ret = kvm_s390_set_mem_control(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_TOD:
1680                 ret = kvm_s390_set_tod(kvm, attr);
1681                 break;
1682         case KVM_S390_VM_CPU_MODEL:
1683                 ret = kvm_s390_set_cpu_model(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_CRYPTO:
1686                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_MIGRATION:
1689                 ret = kvm_s390_vm_set_migration(kvm, attr);
1690                 break;
1691         default:
1692                 ret = -ENXIO;
1693                 break;
1694         }
1695
1696         return ret;
1697 }
1698
1699 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1700 {
1701         int ret;
1702
1703         switch (attr->group) {
1704         case KVM_S390_VM_MEM_CTRL:
1705                 ret = kvm_s390_get_mem_control(kvm, attr);
1706                 break;
1707         case KVM_S390_VM_TOD:
1708                 ret = kvm_s390_get_tod(kvm, attr);
1709                 break;
1710         case KVM_S390_VM_CPU_MODEL:
1711                 ret = kvm_s390_get_cpu_model(kvm, attr);
1712                 break;
1713         case KVM_S390_VM_MIGRATION:
1714                 ret = kvm_s390_vm_get_migration(kvm, attr);
1715                 break;
1716         default:
1717                 ret = -ENXIO;
1718                 break;
1719         }
1720
1721         return ret;
1722 }
1723
1724 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1725 {
1726         int ret;
1727
1728         switch (attr->group) {
1729         case KVM_S390_VM_MEM_CTRL:
1730                 switch (attr->attr) {
1731                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1732                 case KVM_S390_VM_MEM_CLR_CMMA:
1733                         ret = sclp.has_cmma ? 0 : -ENXIO;
1734                         break;
1735                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1736                         ret = 0;
1737                         break;
1738                 default:
1739                         ret = -ENXIO;
1740                         break;
1741                 }
1742                 break;
1743         case KVM_S390_VM_TOD:
1744                 switch (attr->attr) {
1745                 case KVM_S390_VM_TOD_LOW:
1746                 case KVM_S390_VM_TOD_HIGH:
1747                         ret = 0;
1748                         break;
1749                 default:
1750                         ret = -ENXIO;
1751                         break;
1752                 }
1753                 break;
1754         case KVM_S390_VM_CPU_MODEL:
1755                 switch (attr->attr) {
1756                 case KVM_S390_VM_CPU_PROCESSOR:
1757                 case KVM_S390_VM_CPU_MACHINE:
1758                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1759                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1760                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1761                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1762                         ret = 0;
1763                         break;
1764                 default:
1765                         ret = -ENXIO;
1766                         break;
1767                 }
1768                 break;
1769         case KVM_S390_VM_CRYPTO:
1770                 switch (attr->attr) {
1771                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1772                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1773                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1774                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1775                         ret = 0;
1776                         break;
1777                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1778                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1779                         ret = ap_instructions_available() ? 0 : -ENXIO;
1780                         break;
1781                 default:
1782                         ret = -ENXIO;
1783                         break;
1784                 }
1785                 break;
1786         case KVM_S390_VM_MIGRATION:
1787                 ret = 0;
1788                 break;
1789         default:
1790                 ret = -ENXIO;
1791                 break;
1792         }
1793
1794         return ret;
1795 }
1796
1797 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1798 {
1799         uint8_t *keys;
1800         uint64_t hva;
1801         int srcu_idx, i, r = 0;
1802
1803         if (args->flags != 0)
1804                 return -EINVAL;
1805
1806         /* Is this guest using storage keys? */
1807         if (!mm_uses_skeys(current->mm))
1808                 return KVM_S390_GET_SKEYS_NONE;
1809
1810         /* Enforce sane limit on memory allocation */
1811         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1812                 return -EINVAL;
1813
1814         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1815         if (!keys)
1816                 return -ENOMEM;
1817
1818         down_read(&current->mm->mmap_sem);
1819         srcu_idx = srcu_read_lock(&kvm->srcu);
1820         for (i = 0; i < args->count; i++) {
1821                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1822                 if (kvm_is_error_hva(hva)) {
1823                         r = -EFAULT;
1824                         break;
1825                 }
1826
1827                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1828                 if (r)
1829                         break;
1830         }
1831         srcu_read_unlock(&kvm->srcu, srcu_idx);
1832         up_read(&current->mm->mmap_sem);
1833
1834         if (!r) {
1835                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1836                                  sizeof(uint8_t) * args->count);
1837                 if (r)
1838                         r = -EFAULT;
1839         }
1840
1841         kvfree(keys);
1842         return r;
1843 }
1844
1845 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1846 {
1847         uint8_t *keys;
1848         uint64_t hva;
1849         int srcu_idx, i, r = 0;
1850         bool unlocked;
1851
1852         if (args->flags != 0)
1853                 return -EINVAL;
1854
1855         /* Enforce sane limit on memory allocation */
1856         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1857                 return -EINVAL;
1858
1859         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1860         if (!keys)
1861                 return -ENOMEM;
1862
1863         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1864                            sizeof(uint8_t) * args->count);
1865         if (r) {
1866                 r = -EFAULT;
1867                 goto out;
1868         }
1869
1870         /* Enable storage key handling for the guest */
1871         r = s390_enable_skey();
1872         if (r)
1873                 goto out;
1874
1875         i = 0;
1876         down_read(&current->mm->mmap_sem);
1877         srcu_idx = srcu_read_lock(&kvm->srcu);
1878         while (i < args->count) {
1879                 unlocked = false;
1880                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1881                 if (kvm_is_error_hva(hva)) {
1882                         r = -EFAULT;
1883                         break;
1884                 }
1885
1886                 /* Lowest order bit is reserved */
1887                 if (keys[i] & 0x01) {
1888                         r = -EINVAL;
1889                         break;
1890                 }
1891
1892                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1893                 if (r) {
1894                         r = fixup_user_fault(current, current->mm, hva,
1895                                              FAULT_FLAG_WRITE, &unlocked);
1896                         if (r)
1897                                 break;
1898                 }
1899                 if (!r)
1900                         i++;
1901         }
1902         srcu_read_unlock(&kvm->srcu, srcu_idx);
1903         up_read(&current->mm->mmap_sem);
1904 out:
1905         kvfree(keys);
1906         return r;
1907 }
1908
1909 /*
1910  * Base address and length must be sent at the start of each block, therefore
1911  * it's cheaper to send some clean data, as long as it's less than the size of
1912  * two longs.
1913  */
1914 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1915 /* for consistency */
1916 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1917
1918 /*
1919  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1920  * address falls in a hole. In that case the index of one of the memslots
1921  * bordering the hole is returned.
1922  */
1923 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1924 {
1925         int start = 0, end = slots->used_slots;
1926         int slot = atomic_read(&slots->lru_slot);
1927         struct kvm_memory_slot *memslots = slots->memslots;
1928
1929         if (gfn >= memslots[slot].base_gfn &&
1930             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1931                 return slot;
1932
1933         while (start < end) {
1934                 slot = start + (end - start) / 2;
1935
1936                 if (gfn >= memslots[slot].base_gfn)
1937                         end = slot;
1938                 else
1939                         start = slot + 1;
1940         }
1941
1942         if (gfn >= memslots[start].base_gfn &&
1943             gfn < memslots[start].base_gfn + memslots[start].npages) {
1944                 atomic_set(&slots->lru_slot, start);
1945         }
1946
1947         return start;
1948 }
1949
1950 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1951                               u8 *res, unsigned long bufsize)
1952 {
1953         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1954
1955         args->count = 0;
1956         while (args->count < bufsize) {
1957                 hva = gfn_to_hva(kvm, cur_gfn);
1958                 /*
1959                  * We return an error if the first value was invalid, but we
1960                  * return successfully if at least one value was copied.
1961                  */
1962                 if (kvm_is_error_hva(hva))
1963                         return args->count ? 0 : -EFAULT;
1964                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1965                         pgstev = 0;
1966                 res[args->count++] = (pgstev >> 24) & 0x43;
1967                 cur_gfn++;
1968         }
1969
1970         return 0;
1971 }
1972
1973 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1974                                               unsigned long cur_gfn)
1975 {
1976         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1977         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1978         unsigned long ofs = cur_gfn - ms->base_gfn;
1979
1980         if (ms->base_gfn + ms->npages <= cur_gfn) {
1981                 slotidx--;
1982                 /* If we are above the highest slot, wrap around */
1983                 if (slotidx < 0)
1984                         slotidx = slots->used_slots - 1;
1985
1986                 ms = slots->memslots + slotidx;
1987                 ofs = 0;
1988         }
1989         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1990         while ((slotidx > 0) && (ofs >= ms->npages)) {
1991                 slotidx--;
1992                 ms = slots->memslots + slotidx;
1993                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1994         }
1995         return ms->base_gfn + ofs;
1996 }
1997
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999                              u8 *res, unsigned long bufsize)
2000 {
2001         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002         struct kvm_memslots *slots = kvm_memslots(kvm);
2003         struct kvm_memory_slot *ms;
2004
2005         if (unlikely(!slots->used_slots))
2006                 return 0;
2007
2008         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009         ms = gfn_to_memslot(kvm, cur_gfn);
2010         args->count = 0;
2011         args->start_gfn = cur_gfn;
2012         if (!ms)
2013                 return 0;
2014         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2016
2017         while (args->count < bufsize) {
2018                 hva = gfn_to_hva(kvm, cur_gfn);
2019                 if (kvm_is_error_hva(hva))
2020                         return 0;
2021                 /* Decrement only if we actually flipped the bit to 0 */
2022                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025                         pgstev = 0;
2026                 /* Save the value */
2027                 res[args->count++] = (pgstev >> 24) & 0x43;
2028                 /* If the next bit is too far away, stop. */
2029                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030                         return 0;
2031                 /* If we reached the previous "next", find the next one */
2032                 if (cur_gfn == next_gfn)
2033                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034                 /* Reached the end of memory or of the buffer, stop */
2035                 if ((next_gfn >= mem_end) ||
2036                     (next_gfn - args->start_gfn >= bufsize))
2037                         return 0;
2038                 cur_gfn++;
2039                 /* Reached the end of the current memslot, take the next one. */
2040                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2041                         ms = gfn_to_memslot(kvm, cur_gfn);
2042                         if (!ms)
2043                                 return 0;
2044                 }
2045         }
2046         return 0;
2047 }
2048
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058                                   struct kvm_s390_cmma_log *args)
2059 {
2060         unsigned long bufsize;
2061         int srcu_idx, peek, ret;
2062         u8 *values;
2063
2064         if (!kvm->arch.use_cmma)
2065                 return -ENXIO;
2066         /* Invalid/unsupported flags were specified */
2067         if (args->flags & ~KVM_S390_CMMA_PEEK)
2068                 return -EINVAL;
2069         /* Migration mode query, and we are not doing a migration */
2070         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071         if (!peek && !kvm->arch.migration_mode)
2072                 return -EINVAL;
2073         /* CMMA is disabled or was not used, or the buffer has length zero */
2074         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075         if (!bufsize || !kvm->mm->context.uses_cmm) {
2076                 memset(args, 0, sizeof(*args));
2077                 return 0;
2078         }
2079         /* We are not peeking, and there are no dirty pages */
2080         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081                 memset(args, 0, sizeof(*args));
2082                 return 0;
2083         }
2084
2085         values = vmalloc(bufsize);
2086         if (!values)
2087                 return -ENOMEM;
2088
2089         down_read(&kvm->mm->mmap_sem);
2090         srcu_idx = srcu_read_lock(&kvm->srcu);
2091         if (peek)
2092                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093         else
2094                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095         srcu_read_unlock(&kvm->srcu, srcu_idx);
2096         up_read(&kvm->mm->mmap_sem);
2097
2098         if (kvm->arch.migration_mode)
2099                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100         else
2101                 args->remaining = 0;
2102
2103         if (copy_to_user((void __user *)args->values, values, args->count))
2104                 ret = -EFAULT;
2105
2106         vfree(values);
2107         return ret;
2108 }
2109
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116                                   const struct kvm_s390_cmma_log *args)
2117 {
2118         unsigned long hva, mask, pgstev, i;
2119         uint8_t *bits;
2120         int srcu_idx, r = 0;
2121
2122         mask = args->mask;
2123
2124         if (!kvm->arch.use_cmma)
2125                 return -ENXIO;
2126         /* invalid/unsupported flags */
2127         if (args->flags != 0)
2128                 return -EINVAL;
2129         /* Enforce sane limit on memory allocation */
2130         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131                 return -EINVAL;
2132         /* Nothing to do */
2133         if (args->count == 0)
2134                 return 0;
2135
2136         bits = vmalloc(array_size(sizeof(*bits), args->count));
2137         if (!bits)
2138                 return -ENOMEM;
2139
2140         r = copy_from_user(bits, (void __user *)args->values, args->count);
2141         if (r) {
2142                 r = -EFAULT;
2143                 goto out;
2144         }
2145
2146         down_read(&kvm->mm->mmap_sem);
2147         srcu_idx = srcu_read_lock(&kvm->srcu);
2148         for (i = 0; i < args->count; i++) {
2149                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2150                 if (kvm_is_error_hva(hva)) {
2151                         r = -EFAULT;
2152                         break;
2153                 }
2154
2155                 pgstev = bits[i];
2156                 pgstev = pgstev << 24;
2157                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159         }
2160         srcu_read_unlock(&kvm->srcu, srcu_idx);
2161         up_read(&kvm->mm->mmap_sem);
2162
2163         if (!kvm->mm->context.uses_cmm) {
2164                 down_write(&kvm->mm->mmap_sem);
2165                 kvm->mm->context.uses_cmm = 1;
2166                 up_write(&kvm->mm->mmap_sem);
2167         }
2168 out:
2169         vfree(bits);
2170         return r;
2171 }
2172
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175         struct kvm_vcpu *vcpu;
2176         u16 rc, rrc;
2177         int ret = 0;
2178         int i;
2179
2180         /*
2181          * We ignore failures and try to destroy as many CPUs as possible.
2182          * At the same time we must not free the assigned resources when
2183          * this fails, as the ultravisor has still access to that memory.
2184          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185          * behind.
2186          * We want to return the first failure rc and rrc, though.
2187          */
2188         kvm_for_each_vcpu(i, vcpu, kvm) {
2189                 mutex_lock(&vcpu->mutex);
2190                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191                         *rcp = rc;
2192                         *rrcp = rrc;
2193                         ret = -EIO;
2194                 }
2195                 mutex_unlock(&vcpu->mutex);
2196         }
2197         return ret;
2198 }
2199
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2201 {
2202         int i, r = 0;
2203         u16 dummy;
2204
2205         struct kvm_vcpu *vcpu;
2206
2207         kvm_for_each_vcpu(i, vcpu, kvm) {
2208                 mutex_lock(&vcpu->mutex);
2209                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2210                 mutex_unlock(&vcpu->mutex);
2211                 if (r)
2212                         break;
2213         }
2214         if (r)
2215                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2216         return r;
2217 }
2218
2219 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2220 {
2221         int r = 0;
2222         u16 dummy;
2223         void __user *argp = (void __user *)cmd->data;
2224
2225         switch (cmd->cmd) {
2226         case KVM_PV_ENABLE: {
2227                 r = -EINVAL;
2228                 if (kvm_s390_pv_is_protected(kvm))
2229                         break;
2230
2231                 /*
2232                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2233                  *  esca, we need no cleanup in the error cases below
2234                  */
2235                 r = sca_switch_to_extended(kvm);
2236                 if (r)
2237                         break;
2238
2239                 down_write(&current->mm->mmap_sem);
2240                 r = gmap_mark_unmergeable();
2241                 up_write(&current->mm->mmap_sem);
2242                 if (r)
2243                         break;
2244
2245                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2246                 if (r)
2247                         break;
2248
2249                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2250                 if (r)
2251                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2252
2253                 /* we need to block service interrupts from now on */
2254                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2255                 break;
2256         }
2257         case KVM_PV_DISABLE: {
2258                 r = -EINVAL;
2259                 if (!kvm_s390_pv_is_protected(kvm))
2260                         break;
2261
2262                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2263                 /*
2264                  * If a CPU could not be destroyed, destroy VM will also fail.
2265                  * There is no point in trying to destroy it. Instead return
2266                  * the rc and rrc from the first CPU that failed destroying.
2267                  */
2268                 if (r)
2269                         break;
2270                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2271
2272                 /* no need to block service interrupts any more */
2273                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2274                 break;
2275         }
2276         case KVM_PV_SET_SEC_PARMS: {
2277                 struct kvm_s390_pv_sec_parm parms = {};
2278                 void *hdr;
2279
2280                 r = -EINVAL;
2281                 if (!kvm_s390_pv_is_protected(kvm))
2282                         break;
2283
2284                 r = -EFAULT;
2285                 if (copy_from_user(&parms, argp, sizeof(parms)))
2286                         break;
2287
2288                 /* Currently restricted to 8KB */
2289                 r = -EINVAL;
2290                 if (parms.length > PAGE_SIZE * 2)
2291                         break;
2292
2293                 r = -ENOMEM;
2294                 hdr = vmalloc(parms.length);
2295                 if (!hdr)
2296                         break;
2297
2298                 r = -EFAULT;
2299                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2300                                     parms.length))
2301                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2302                                                       &cmd->rc, &cmd->rrc);
2303
2304                 vfree(hdr);
2305                 break;
2306         }
2307         case KVM_PV_UNPACK: {
2308                 struct kvm_s390_pv_unp unp = {};
2309
2310                 r = -EINVAL;
2311                 if (!kvm_s390_pv_is_protected(kvm))
2312                         break;
2313
2314                 r = -EFAULT;
2315                 if (copy_from_user(&unp, argp, sizeof(unp)))
2316                         break;
2317
2318                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2319                                        &cmd->rc, &cmd->rrc);
2320                 break;
2321         }
2322         case KVM_PV_VERIFY: {
2323                 r = -EINVAL;
2324                 if (!kvm_s390_pv_is_protected(kvm))
2325                         break;
2326
2327                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2328                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2329                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2330                              cmd->rrc);
2331                 break;
2332         }
2333         case KVM_PV_PREP_RESET: {
2334                 r = -EINVAL;
2335                 if (!kvm_s390_pv_is_protected(kvm))
2336                         break;
2337
2338                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2339                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2340                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2341                              cmd->rc, cmd->rrc);
2342                 break;
2343         }
2344         case KVM_PV_UNSHARE_ALL: {
2345                 r = -EINVAL;
2346                 if (!kvm_s390_pv_is_protected(kvm))
2347                         break;
2348
2349                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2350                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2351                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2352                              cmd->rc, cmd->rrc);
2353                 break;
2354         }
2355         default:
2356                 r = -ENOTTY;
2357         }
2358         return r;
2359 }
2360
2361 long kvm_arch_vm_ioctl(struct file *filp,
2362                        unsigned int ioctl, unsigned long arg)
2363 {
2364         struct kvm *kvm = filp->private_data;
2365         void __user *argp = (void __user *)arg;
2366         struct kvm_device_attr attr;
2367         int r;
2368
2369         switch (ioctl) {
2370         case KVM_S390_INTERRUPT: {
2371                 struct kvm_s390_interrupt s390int;
2372
2373                 r = -EFAULT;
2374                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2375                         break;
2376                 r = kvm_s390_inject_vm(kvm, &s390int);
2377                 break;
2378         }
2379         case KVM_CREATE_IRQCHIP: {
2380                 struct kvm_irq_routing_entry routing;
2381
2382                 r = -EINVAL;
2383                 if (kvm->arch.use_irqchip) {
2384                         /* Set up dummy routing. */
2385                         memset(&routing, 0, sizeof(routing));
2386                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2387                 }
2388                 break;
2389         }
2390         case KVM_SET_DEVICE_ATTR: {
2391                 r = -EFAULT;
2392                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2393                         break;
2394                 r = kvm_s390_vm_set_attr(kvm, &attr);
2395                 break;
2396         }
2397         case KVM_GET_DEVICE_ATTR: {
2398                 r = -EFAULT;
2399                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2400                         break;
2401                 r = kvm_s390_vm_get_attr(kvm, &attr);
2402                 break;
2403         }
2404         case KVM_HAS_DEVICE_ATTR: {
2405                 r = -EFAULT;
2406                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2407                         break;
2408                 r = kvm_s390_vm_has_attr(kvm, &attr);
2409                 break;
2410         }
2411         case KVM_S390_GET_SKEYS: {
2412                 struct kvm_s390_skeys args;
2413
2414                 r = -EFAULT;
2415                 if (copy_from_user(&args, argp,
2416                                    sizeof(struct kvm_s390_skeys)))
2417                         break;
2418                 r = kvm_s390_get_skeys(kvm, &args);
2419                 break;
2420         }
2421         case KVM_S390_SET_SKEYS: {
2422                 struct kvm_s390_skeys args;
2423
2424                 r = -EFAULT;
2425                 if (copy_from_user(&args, argp,
2426                                    sizeof(struct kvm_s390_skeys)))
2427                         break;
2428                 r = kvm_s390_set_skeys(kvm, &args);
2429                 break;
2430         }
2431         case KVM_S390_GET_CMMA_BITS: {
2432                 struct kvm_s390_cmma_log args;
2433
2434                 r = -EFAULT;
2435                 if (copy_from_user(&args, argp, sizeof(args)))
2436                         break;
2437                 mutex_lock(&kvm->slots_lock);
2438                 r = kvm_s390_get_cmma_bits(kvm, &args);
2439                 mutex_unlock(&kvm->slots_lock);
2440                 if (!r) {
2441                         r = copy_to_user(argp, &args, sizeof(args));
2442                         if (r)
2443                                 r = -EFAULT;
2444                 }
2445                 break;
2446         }
2447         case KVM_S390_SET_CMMA_BITS: {
2448                 struct kvm_s390_cmma_log args;
2449
2450                 r = -EFAULT;
2451                 if (copy_from_user(&args, argp, sizeof(args)))
2452                         break;
2453                 mutex_lock(&kvm->slots_lock);
2454                 r = kvm_s390_set_cmma_bits(kvm, &args);
2455                 mutex_unlock(&kvm->slots_lock);
2456                 break;
2457         }
2458         case KVM_S390_PV_COMMAND: {
2459                 struct kvm_pv_cmd args;
2460
2461                 /* protvirt means user sigp */
2462                 kvm->arch.user_cpu_state_ctrl = 1;
2463                 r = 0;
2464                 if (!is_prot_virt_host()) {
2465                         r = -EINVAL;
2466                         break;
2467                 }
2468                 if (copy_from_user(&args, argp, sizeof(args))) {
2469                         r = -EFAULT;
2470                         break;
2471                 }
2472                 if (args.flags) {
2473                         r = -EINVAL;
2474                         break;
2475                 }
2476                 mutex_lock(&kvm->lock);
2477                 r = kvm_s390_handle_pv(kvm, &args);
2478                 mutex_unlock(&kvm->lock);
2479                 if (copy_to_user(argp, &args, sizeof(args))) {
2480                         r = -EFAULT;
2481                         break;
2482                 }
2483                 break;
2484         }
2485         default:
2486                 r = -ENOTTY;
2487         }
2488
2489         return r;
2490 }
2491
2492 static int kvm_s390_apxa_installed(void)
2493 {
2494         struct ap_config_info info;
2495
2496         if (ap_instructions_available()) {
2497                 if (ap_qci(&info) == 0)
2498                         return info.apxa;
2499         }
2500
2501         return 0;
2502 }
2503
2504 /*
2505  * The format of the crypto control block (CRYCB) is specified in the 3 low
2506  * order bits of the CRYCB designation (CRYCBD) field as follows:
2507  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2508  *           AP extended addressing (APXA) facility are installed.
2509  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2510  * Format 2: Both the APXA and MSAX3 facilities are installed
2511  */
2512 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2513 {
2514         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2515
2516         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2517         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2518
2519         /* Check whether MSAX3 is installed */
2520         if (!test_kvm_facility(kvm, 76))
2521                 return;
2522
2523         if (kvm_s390_apxa_installed())
2524                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2525         else
2526                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2527 }
2528
2529 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2530                                unsigned long *aqm, unsigned long *adm)
2531 {
2532         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2533
2534         mutex_lock(&kvm->lock);
2535         kvm_s390_vcpu_block_all(kvm);
2536
2537         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2538         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2539                 memcpy(crycb->apcb1.apm, apm, 32);
2540                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2541                          apm[0], apm[1], apm[2], apm[3]);
2542                 memcpy(crycb->apcb1.aqm, aqm, 32);
2543                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2544                          aqm[0], aqm[1], aqm[2], aqm[3]);
2545                 memcpy(crycb->apcb1.adm, adm, 32);
2546                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2547                          adm[0], adm[1], adm[2], adm[3]);
2548                 break;
2549         case CRYCB_FORMAT1:
2550         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2551                 memcpy(crycb->apcb0.apm, apm, 8);
2552                 memcpy(crycb->apcb0.aqm, aqm, 2);
2553                 memcpy(crycb->apcb0.adm, adm, 2);
2554                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2555                          apm[0], *((unsigned short *)aqm),
2556                          *((unsigned short *)adm));
2557                 break;
2558         default:        /* Can not happen */
2559                 break;
2560         }
2561
2562         /* recreate the shadow crycb for each vcpu */
2563         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2564         kvm_s390_vcpu_unblock_all(kvm);
2565         mutex_unlock(&kvm->lock);
2566 }
2567 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2568
2569 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2570 {
2571         mutex_lock(&kvm->lock);
2572         kvm_s390_vcpu_block_all(kvm);
2573
2574         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2575                sizeof(kvm->arch.crypto.crycb->apcb0));
2576         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2577                sizeof(kvm->arch.crypto.crycb->apcb1));
2578
2579         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2580         /* recreate the shadow crycb for each vcpu */
2581         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2582         kvm_s390_vcpu_unblock_all(kvm);
2583         mutex_unlock(&kvm->lock);
2584 }
2585 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2586
2587 static u64 kvm_s390_get_initial_cpuid(void)
2588 {
2589         struct cpuid cpuid;
2590
2591         get_cpu_id(&cpuid);
2592         cpuid.version = 0xff;
2593         return *((u64 *) &cpuid);
2594 }
2595
2596 static void kvm_s390_crypto_init(struct kvm *kvm)
2597 {
2598         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2599         kvm_s390_set_crycb_format(kvm);
2600
2601         if (!test_kvm_facility(kvm, 76))
2602                 return;
2603
2604         /* Enable AES/DEA protected key functions by default */
2605         kvm->arch.crypto.aes_kw = 1;
2606         kvm->arch.crypto.dea_kw = 1;
2607         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2608                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2609         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2610                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2611 }
2612
2613 static void sca_dispose(struct kvm *kvm)
2614 {
2615         if (kvm->arch.use_esca)
2616                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2617         else
2618                 free_page((unsigned long)(kvm->arch.sca));
2619         kvm->arch.sca = NULL;
2620 }
2621
2622 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2623 {
2624         gfp_t alloc_flags = GFP_KERNEL;
2625         int i, rc;
2626         char debug_name[16];
2627         static unsigned long sca_offset;
2628
2629         rc = -EINVAL;
2630 #ifdef CONFIG_KVM_S390_UCONTROL
2631         if (type & ~KVM_VM_S390_UCONTROL)
2632                 goto out_err;
2633         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2634                 goto out_err;
2635 #else
2636         if (type)
2637                 goto out_err;
2638 #endif
2639
2640         rc = s390_enable_sie();
2641         if (rc)
2642                 goto out_err;
2643
2644         rc = -ENOMEM;
2645
2646         if (!sclp.has_64bscao)
2647                 alloc_flags |= GFP_DMA;
2648         rwlock_init(&kvm->arch.sca_lock);
2649         /* start with basic SCA */
2650         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2651         if (!kvm->arch.sca)
2652                 goto out_err;
2653         mutex_lock(&kvm_lock);
2654         sca_offset += 16;
2655         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2656                 sca_offset = 0;
2657         kvm->arch.sca = (struct bsca_block *)
2658                         ((char *) kvm->arch.sca + sca_offset);
2659         mutex_unlock(&kvm_lock);
2660
2661         sprintf(debug_name, "kvm-%u", current->pid);
2662
2663         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2664         if (!kvm->arch.dbf)
2665                 goto out_err;
2666
2667         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2668         kvm->arch.sie_page2 =
2669              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2670         if (!kvm->arch.sie_page2)
2671                 goto out_err;
2672
2673         kvm->arch.sie_page2->kvm = kvm;
2674         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2675
2676         for (i = 0; i < kvm_s390_fac_size(); i++) {
2677                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2678                                               (kvm_s390_fac_base[i] |
2679                                                kvm_s390_fac_ext[i]);
2680                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2681                                               kvm_s390_fac_base[i];
2682         }
2683         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2684
2685         /* we are always in czam mode - even on pre z14 machines */
2686         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2687         set_kvm_facility(kvm->arch.model.fac_list, 138);
2688         /* we emulate STHYI in kvm */
2689         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2690         set_kvm_facility(kvm->arch.model.fac_list, 74);
2691         if (MACHINE_HAS_TLB_GUEST) {
2692                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2693                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2694         }
2695
2696         if (css_general_characteristics.aiv && test_facility(65))
2697                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2698
2699         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2700         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2701
2702         kvm_s390_crypto_init(kvm);
2703
2704         mutex_init(&kvm->arch.float_int.ais_lock);
2705         spin_lock_init(&kvm->arch.float_int.lock);
2706         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2707                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2708         init_waitqueue_head(&kvm->arch.ipte_wq);
2709         mutex_init(&kvm->arch.ipte_mutex);
2710
2711         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2712         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2713
2714         if (type & KVM_VM_S390_UCONTROL) {
2715                 kvm->arch.gmap = NULL;
2716                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2717         } else {
2718                 if (sclp.hamax == U64_MAX)
2719                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2720                 else
2721                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2722                                                     sclp.hamax + 1);
2723                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2724                 if (!kvm->arch.gmap)
2725                         goto out_err;
2726                 kvm->arch.gmap->private = kvm;
2727                 kvm->arch.gmap->pfault_enabled = 0;
2728         }
2729
2730         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2731         kvm->arch.use_skf = sclp.has_skey;
2732         spin_lock_init(&kvm->arch.start_stop_lock);
2733         kvm_s390_vsie_init(kvm);
2734         if (use_gisa)
2735                 kvm_s390_gisa_init(kvm);
2736         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2737
2738         return 0;
2739 out_err:
2740         free_page((unsigned long)kvm->arch.sie_page2);
2741         debug_unregister(kvm->arch.dbf);
2742         sca_dispose(kvm);
2743         KVM_EVENT(3, "creation of vm failed: %d", rc);
2744         return rc;
2745 }
2746
2747 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2748 {
2749         u16 rc, rrc;
2750
2751         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2752         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2753         kvm_s390_clear_local_irqs(vcpu);
2754         kvm_clear_async_pf_completion_queue(vcpu);
2755         if (!kvm_is_ucontrol(vcpu->kvm))
2756                 sca_del_vcpu(vcpu);
2757
2758         if (kvm_is_ucontrol(vcpu->kvm))
2759                 gmap_remove(vcpu->arch.gmap);
2760
2761         if (vcpu->kvm->arch.use_cmma)
2762                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2763         /* We can not hold the vcpu mutex here, we are already dying */
2764         if (kvm_s390_pv_cpu_get_handle(vcpu))
2765                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2766         free_page((unsigned long)(vcpu->arch.sie_block));
2767 }
2768
2769 static void kvm_free_vcpus(struct kvm *kvm)
2770 {
2771         unsigned int i;
2772         struct kvm_vcpu *vcpu;
2773
2774         kvm_for_each_vcpu(i, vcpu, kvm)
2775                 kvm_vcpu_destroy(vcpu);
2776
2777         mutex_lock(&kvm->lock);
2778         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2779                 kvm->vcpus[i] = NULL;
2780
2781         atomic_set(&kvm->online_vcpus, 0);
2782         mutex_unlock(&kvm->lock);
2783 }
2784
2785 void kvm_arch_destroy_vm(struct kvm *kvm)
2786 {
2787         u16 rc, rrc;
2788
2789         kvm_free_vcpus(kvm);
2790         sca_dispose(kvm);
2791         kvm_s390_gisa_destroy(kvm);
2792         /*
2793          * We are already at the end of life and kvm->lock is not taken.
2794          * This is ok as the file descriptor is closed by now and nobody
2795          * can mess with the pv state. To avoid lockdep_assert_held from
2796          * complaining we do not use kvm_s390_pv_is_protected.
2797          */
2798         if (kvm_s390_pv_get_handle(kvm))
2799                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2800         debug_unregister(kvm->arch.dbf);
2801         free_page((unsigned long)kvm->arch.sie_page2);
2802         if (!kvm_is_ucontrol(kvm))
2803                 gmap_remove(kvm->arch.gmap);
2804         kvm_s390_destroy_adapters(kvm);
2805         kvm_s390_clear_float_irqs(kvm);
2806         kvm_s390_vsie_destroy(kvm);
2807         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2808 }
2809
2810 /* Section: vcpu related */
2811 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2812 {
2813         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2814         if (!vcpu->arch.gmap)
2815                 return -ENOMEM;
2816         vcpu->arch.gmap->private = vcpu->kvm;
2817
2818         return 0;
2819 }
2820
2821 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2822 {
2823         if (!kvm_s390_use_sca_entries())
2824                 return;
2825         read_lock(&vcpu->kvm->arch.sca_lock);
2826         if (vcpu->kvm->arch.use_esca) {
2827                 struct esca_block *sca = vcpu->kvm->arch.sca;
2828
2829                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2830                 sca->cpu[vcpu->vcpu_id].sda = 0;
2831         } else {
2832                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2833
2834                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2835                 sca->cpu[vcpu->vcpu_id].sda = 0;
2836         }
2837         read_unlock(&vcpu->kvm->arch.sca_lock);
2838 }
2839
2840 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2841 {
2842         if (!kvm_s390_use_sca_entries()) {
2843                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2844
2845                 /* we still need the basic sca for the ipte control */
2846                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2847                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2848                 return;
2849         }
2850         read_lock(&vcpu->kvm->arch.sca_lock);
2851         if (vcpu->kvm->arch.use_esca) {
2852                 struct esca_block *sca = vcpu->kvm->arch.sca;
2853
2854                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2855                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2857                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2858                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2859         } else {
2860                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2861
2862                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2863                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2864                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2865                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2866         }
2867         read_unlock(&vcpu->kvm->arch.sca_lock);
2868 }
2869
2870 /* Basic SCA to Extended SCA data copy routines */
2871 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2872 {
2873         d->sda = s->sda;
2874         d->sigp_ctrl.c = s->sigp_ctrl.c;
2875         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2876 }
2877
2878 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2879 {
2880         int i;
2881
2882         d->ipte_control = s->ipte_control;
2883         d->mcn[0] = s->mcn;
2884         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2885                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2886 }
2887
2888 static int sca_switch_to_extended(struct kvm *kvm)
2889 {
2890         struct bsca_block *old_sca = kvm->arch.sca;
2891         struct esca_block *new_sca;
2892         struct kvm_vcpu *vcpu;
2893         unsigned int vcpu_idx;
2894         u32 scaol, scaoh;
2895
2896         if (kvm->arch.use_esca)
2897                 return 0;
2898
2899         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2900         if (!new_sca)
2901                 return -ENOMEM;
2902
2903         scaoh = (u32)((u64)(new_sca) >> 32);
2904         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2905
2906         kvm_s390_vcpu_block_all(kvm);
2907         write_lock(&kvm->arch.sca_lock);
2908
2909         sca_copy_b_to_e(new_sca, old_sca);
2910
2911         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2912                 vcpu->arch.sie_block->scaoh = scaoh;
2913                 vcpu->arch.sie_block->scaol = scaol;
2914                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2915         }
2916         kvm->arch.sca = new_sca;
2917         kvm->arch.use_esca = 1;
2918
2919         write_unlock(&kvm->arch.sca_lock);
2920         kvm_s390_vcpu_unblock_all(kvm);
2921
2922         free_page((unsigned long)old_sca);
2923
2924         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2925                  old_sca, kvm->arch.sca);
2926         return 0;
2927 }
2928
2929 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2930 {
2931         int rc;
2932
2933         if (!kvm_s390_use_sca_entries()) {
2934                 if (id < KVM_MAX_VCPUS)
2935                         return true;
2936                 return false;
2937         }
2938         if (id < KVM_S390_BSCA_CPU_SLOTS)
2939                 return true;
2940         if (!sclp.has_esca || !sclp.has_64bscao)
2941                 return false;
2942
2943         mutex_lock(&kvm->lock);
2944         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2945         mutex_unlock(&kvm->lock);
2946
2947         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2948 }
2949
2950 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2951 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2952 {
2953         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2954         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2955         vcpu->arch.cputm_start = get_tod_clock_fast();
2956         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2957 }
2958
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2961 {
2962         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2963         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2965         vcpu->arch.cputm_start = 0;
2966         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2967 }
2968
2969 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2970 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2971 {
2972         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2973         vcpu->arch.cputm_enabled = true;
2974         __start_cpu_timer_accounting(vcpu);
2975 }
2976
2977 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2978 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2979 {
2980         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2981         __stop_cpu_timer_accounting(vcpu);
2982         vcpu->arch.cputm_enabled = false;
2983 }
2984
2985 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2986 {
2987         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2988         __enable_cpu_timer_accounting(vcpu);
2989         preempt_enable();
2990 }
2991
2992 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2993 {
2994         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2995         __disable_cpu_timer_accounting(vcpu);
2996         preempt_enable();
2997 }
2998
2999 /* set the cpu timer - may only be called from the VCPU thread itself */
3000 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3001 {
3002         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3003         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3004         if (vcpu->arch.cputm_enabled)
3005                 vcpu->arch.cputm_start = get_tod_clock_fast();
3006         vcpu->arch.sie_block->cputm = cputm;
3007         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3008         preempt_enable();
3009 }
3010
3011 /* update and get the cpu timer - can also be called from other VCPU threads */
3012 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3013 {
3014         unsigned int seq;
3015         __u64 value;
3016
3017         if (unlikely(!vcpu->arch.cputm_enabled))
3018                 return vcpu->arch.sie_block->cputm;
3019
3020         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3021         do {
3022                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3023                 /*
3024                  * If the writer would ever execute a read in the critical
3025                  * section, e.g. in irq context, we have a deadlock.
3026                  */
3027                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3028                 value = vcpu->arch.sie_block->cputm;
3029                 /* if cputm_start is 0, accounting is being started/stopped */
3030                 if (likely(vcpu->arch.cputm_start))
3031                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3032         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3033         preempt_enable();
3034         return value;
3035 }
3036
3037 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3038 {
3039
3040         gmap_enable(vcpu->arch.enabled_gmap);
3041         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3042         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3043                 __start_cpu_timer_accounting(vcpu);
3044         vcpu->cpu = cpu;
3045 }
3046
3047 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3048 {
3049         vcpu->cpu = -1;
3050         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3051                 __stop_cpu_timer_accounting(vcpu);
3052         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3053         vcpu->arch.enabled_gmap = gmap_get_enabled();
3054         gmap_disable(vcpu->arch.enabled_gmap);
3055
3056 }
3057
3058 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3059 {
3060         mutex_lock(&vcpu->kvm->lock);
3061         preempt_disable();
3062         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3063         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3064         preempt_enable();
3065         mutex_unlock(&vcpu->kvm->lock);
3066         if (!kvm_is_ucontrol(vcpu->kvm)) {
3067                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3068                 sca_add_vcpu(vcpu);
3069         }
3070         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3071                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3072         /* make vcpu_load load the right gmap on the first trigger */
3073         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3074 }
3075
3076 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3077 {
3078         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3079             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3080                 return true;
3081         return false;
3082 }
3083
3084 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3085 {
3086         /* At least one ECC subfunction must be present */
3087         return kvm_has_pckmo_subfunc(kvm, 32) ||
3088                kvm_has_pckmo_subfunc(kvm, 33) ||
3089                kvm_has_pckmo_subfunc(kvm, 34) ||
3090                kvm_has_pckmo_subfunc(kvm, 40) ||
3091                kvm_has_pckmo_subfunc(kvm, 41);
3092
3093 }
3094
3095 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3096 {
3097         /*
3098          * If the AP instructions are not being interpreted and the MSAX3
3099          * facility is not configured for the guest, there is nothing to set up.
3100          */
3101         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3102                 return;
3103
3104         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3105         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3106         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3107         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3108
3109         if (vcpu->kvm->arch.crypto.apie)
3110                 vcpu->arch.sie_block->eca |= ECA_APIE;
3111
3112         /* Set up protected key support */
3113         if (vcpu->kvm->arch.crypto.aes_kw) {
3114                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3115                 /* ecc is also wrapped with AES key */
3116                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3117                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3118         }
3119
3120         if (vcpu->kvm->arch.crypto.dea_kw)
3121                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3122 }
3123
3124 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3125 {
3126         free_page(vcpu->arch.sie_block->cbrlo);
3127         vcpu->arch.sie_block->cbrlo = 0;
3128 }
3129
3130 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3131 {
3132         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3133         if (!vcpu->arch.sie_block->cbrlo)
3134                 return -ENOMEM;
3135         return 0;
3136 }
3137
3138 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3139 {
3140         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3141
3142         vcpu->arch.sie_block->ibc = model->ibc;
3143         if (test_kvm_facility(vcpu->kvm, 7))
3144                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3145 }
3146
3147 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3148 {
3149         int rc = 0;
3150         u16 uvrc, uvrrc;
3151
3152         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3153                                                     CPUSTAT_SM |
3154                                                     CPUSTAT_STOPPED);
3155
3156         if (test_kvm_facility(vcpu->kvm, 78))
3157                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3158         else if (test_kvm_facility(vcpu->kvm, 8))
3159                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3160
3161         kvm_s390_vcpu_setup_model(vcpu);
3162
3163         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3164         if (MACHINE_HAS_ESOP)
3165                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3166         if (test_kvm_facility(vcpu->kvm, 9))
3167                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3168         if (test_kvm_facility(vcpu->kvm, 73))
3169                 vcpu->arch.sie_block->ecb |= ECB_TE;
3170
3171         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3172                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3173         if (test_kvm_facility(vcpu->kvm, 130))
3174                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3175         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3176         if (sclp.has_cei)
3177                 vcpu->arch.sie_block->eca |= ECA_CEI;
3178         if (sclp.has_ib)
3179                 vcpu->arch.sie_block->eca |= ECA_IB;
3180         if (sclp.has_siif)
3181                 vcpu->arch.sie_block->eca |= ECA_SII;
3182         if (sclp.has_sigpif)
3183                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3184         if (test_kvm_facility(vcpu->kvm, 129)) {
3185                 vcpu->arch.sie_block->eca |= ECA_VX;
3186                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3187         }
3188         if (test_kvm_facility(vcpu->kvm, 139))
3189                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3190         if (test_kvm_facility(vcpu->kvm, 156))
3191                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3192         if (vcpu->arch.sie_block->gd) {
3193                 vcpu->arch.sie_block->eca |= ECA_AIV;
3194                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3195                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3196         }
3197         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3198                                         | SDNXC;
3199         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3200
3201         if (sclp.has_kss)
3202                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3203         else
3204                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3205
3206         if (vcpu->kvm->arch.use_cmma) {
3207                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3208                 if (rc)
3209                         return rc;
3210         }
3211         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3212         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3213
3214         vcpu->arch.sie_block->hpid = HPID_KVM;
3215
3216         kvm_s390_vcpu_crypto_setup(vcpu);
3217
3218         mutex_lock(&vcpu->kvm->lock);
3219         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3220                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3221                 if (rc)
3222                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3223         }
3224         mutex_unlock(&vcpu->kvm->lock);
3225
3226         return rc;
3227 }
3228
3229 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3230 {
3231         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3232                 return -EINVAL;
3233         return 0;
3234 }
3235
3236 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3237 {
3238         struct sie_page *sie_page;
3239         int rc;
3240
3241         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3242         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3243         if (!sie_page)
3244                 return -ENOMEM;
3245
3246         vcpu->arch.sie_block = &sie_page->sie_block;
3247         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3248
3249         /* the real guest size will always be smaller than msl */
3250         vcpu->arch.sie_block->mso = 0;
3251         vcpu->arch.sie_block->msl = sclp.hamax;
3252
3253         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3254         spin_lock_init(&vcpu->arch.local_int.lock);
3255         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3256         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3257                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3258         seqcount_init(&vcpu->arch.cputm_seqcount);
3259
3260         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3261         kvm_clear_async_pf_completion_queue(vcpu);
3262         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3263                                     KVM_SYNC_GPRS |
3264                                     KVM_SYNC_ACRS |
3265                                     KVM_SYNC_CRS |
3266                                     KVM_SYNC_ARCH0 |
3267                                     KVM_SYNC_PFAULT;
3268         kvm_s390_set_prefix(vcpu, 0);
3269         if (test_kvm_facility(vcpu->kvm, 64))
3270                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3271         if (test_kvm_facility(vcpu->kvm, 82))
3272                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3273         if (test_kvm_facility(vcpu->kvm, 133))
3274                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3275         if (test_kvm_facility(vcpu->kvm, 156))
3276                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3277         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3278          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3279          */
3280         if (MACHINE_HAS_VX)
3281                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3282         else
3283                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3284
3285         if (kvm_is_ucontrol(vcpu->kvm)) {
3286                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3287                 if (rc)
3288                         goto out_free_sie_block;
3289         }
3290
3291         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3292                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3293         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3294
3295         rc = kvm_s390_vcpu_setup(vcpu);
3296         if (rc)
3297                 goto out_ucontrol_uninit;
3298         return 0;
3299
3300 out_ucontrol_uninit:
3301         if (kvm_is_ucontrol(vcpu->kvm))
3302                 gmap_remove(vcpu->arch.gmap);
3303 out_free_sie_block:
3304         free_page((unsigned long)(vcpu->arch.sie_block));
3305         return rc;
3306 }
3307
3308 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3309 {
3310         return kvm_s390_vcpu_has_irq(vcpu, 0);
3311 }
3312
3313 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3314 {
3315         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3316 }
3317
3318 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3319 {
3320         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3321         exit_sie(vcpu);
3322 }
3323
3324 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3325 {
3326         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3327 }
3328
3329 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3330 {
3331         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3332         exit_sie(vcpu);
3333 }
3334
3335 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3336 {
3337         return atomic_read(&vcpu->arch.sie_block->prog20) &
3338                (PROG_BLOCK_SIE | PROG_REQUEST);
3339 }
3340
3341 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3342 {
3343         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3344 }
3345
3346 /*
3347  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3348  * If the CPU is not running (e.g. waiting as idle) the function will
3349  * return immediately. */
3350 void exit_sie(struct kvm_vcpu *vcpu)
3351 {
3352         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3353         kvm_s390_vsie_kick(vcpu);
3354         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3355                 cpu_relax();
3356 }
3357
3358 /* Kick a guest cpu out of SIE to process a request synchronously */
3359 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3360 {
3361         kvm_make_request(req, vcpu);
3362         kvm_s390_vcpu_request(vcpu);
3363 }
3364
3365 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3366                               unsigned long end)
3367 {
3368         struct kvm *kvm = gmap->private;
3369         struct kvm_vcpu *vcpu;
3370         unsigned long prefix;
3371         int i;
3372
3373         if (gmap_is_shadow(gmap))
3374                 return;
3375         if (start >= 1UL << 31)
3376                 /* We are only interested in prefix pages */
3377                 return;
3378         kvm_for_each_vcpu(i, vcpu, kvm) {
3379                 /* match against both prefix pages */
3380                 prefix = kvm_s390_get_prefix(vcpu);
3381                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3382                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3383                                    start, end);
3384                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3385                 }
3386         }
3387 }
3388
3389 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3390 {
3391         /* do not poll with more than halt_poll_max_steal percent of steal time */
3392         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3393             halt_poll_max_steal) {
3394                 vcpu->stat.halt_no_poll_steal++;
3395                 return true;
3396         }
3397         return false;
3398 }
3399
3400 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3401 {
3402         /* kvm common code refers to this, but never calls it */
3403         BUG();
3404         return 0;
3405 }
3406
3407 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3408                                            struct kvm_one_reg *reg)
3409 {
3410         int r = -EINVAL;
3411
3412         switch (reg->id) {
3413         case KVM_REG_S390_TODPR:
3414                 r = put_user(vcpu->arch.sie_block->todpr,
3415                              (u32 __user *)reg->addr);
3416                 break;
3417         case KVM_REG_S390_EPOCHDIFF:
3418                 r = put_user(vcpu->arch.sie_block->epoch,
3419                              (u64 __user *)reg->addr);
3420                 break;
3421         case KVM_REG_S390_CPU_TIMER:
3422                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3423                              (u64 __user *)reg->addr);
3424                 break;
3425         case KVM_REG_S390_CLOCK_COMP:
3426                 r = put_user(vcpu->arch.sie_block->ckc,
3427                              (u64 __user *)reg->addr);
3428                 break;
3429         case KVM_REG_S390_PFTOKEN:
3430                 r = put_user(vcpu->arch.pfault_token,
3431                              (u64 __user *)reg->addr);
3432                 break;
3433         case KVM_REG_S390_PFCOMPARE:
3434                 r = put_user(vcpu->arch.pfault_compare,
3435                              (u64 __user *)reg->addr);
3436                 break;
3437         case KVM_REG_S390_PFSELECT:
3438                 r = put_user(vcpu->arch.pfault_select,
3439                              (u64 __user *)reg->addr);
3440                 break;
3441         case KVM_REG_S390_PP:
3442                 r = put_user(vcpu->arch.sie_block->pp,
3443                              (u64 __user *)reg->addr);
3444                 break;
3445         case KVM_REG_S390_GBEA:
3446                 r = put_user(vcpu->arch.sie_block->gbea,
3447                              (u64 __user *)reg->addr);
3448                 break;
3449         default:
3450                 break;
3451         }
3452
3453         return r;
3454 }
3455
3456 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3457                                            struct kvm_one_reg *reg)
3458 {
3459         int r = -EINVAL;
3460         __u64 val;
3461
3462         switch (reg->id) {
3463         case KVM_REG_S390_TODPR:
3464                 r = get_user(vcpu->arch.sie_block->todpr,
3465                              (u32 __user *)reg->addr);
3466                 break;
3467         case KVM_REG_S390_EPOCHDIFF:
3468                 r = get_user(vcpu->arch.sie_block->epoch,
3469                              (u64 __user *)reg->addr);
3470                 break;
3471         case KVM_REG_S390_CPU_TIMER:
3472                 r = get_user(val, (u64 __user *)reg->addr);
3473                 if (!r)
3474                         kvm_s390_set_cpu_timer(vcpu, val);
3475                 break;
3476         case KVM_REG_S390_CLOCK_COMP:
3477                 r = get_user(vcpu->arch.sie_block->ckc,
3478                              (u64 __user *)reg->addr);
3479                 break;
3480         case KVM_REG_S390_PFTOKEN:
3481                 r = get_user(vcpu->arch.pfault_token,
3482                              (u64 __user *)reg->addr);
3483                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3484                         kvm_clear_async_pf_completion_queue(vcpu);
3485                 break;
3486         case KVM_REG_S390_PFCOMPARE:
3487                 r = get_user(vcpu->arch.pfault_compare,
3488                              (u64 __user *)reg->addr);
3489                 break;
3490         case KVM_REG_S390_PFSELECT:
3491                 r = get_user(vcpu->arch.pfault_select,
3492                              (u64 __user *)reg->addr);
3493                 break;
3494         case KVM_REG_S390_PP:
3495                 r = get_user(vcpu->arch.sie_block->pp,
3496                              (u64 __user *)reg->addr);
3497                 break;
3498         case KVM_REG_S390_GBEA:
3499                 r = get_user(vcpu->arch.sie_block->gbea,
3500                              (u64 __user *)reg->addr);
3501                 break;
3502         default:
3503                 break;
3504         }
3505
3506         return r;
3507 }
3508
3509 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3510 {
3511         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3512         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3513         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3514
3515         kvm_clear_async_pf_completion_queue(vcpu);
3516         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3517                 kvm_s390_vcpu_stop(vcpu);
3518         kvm_s390_clear_local_irqs(vcpu);
3519 }
3520
3521 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3522 {
3523         /* Initial reset is a superset of the normal reset */
3524         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3525
3526         /*
3527          * This equals initial cpu reset in pop, but we don't switch to ESA.
3528          * We do not only reset the internal data, but also ...
3529          */
3530         vcpu->arch.sie_block->gpsw.mask = 0;
3531         vcpu->arch.sie_block->gpsw.addr = 0;
3532         kvm_s390_set_prefix(vcpu, 0);
3533         kvm_s390_set_cpu_timer(vcpu, 0);
3534         vcpu->arch.sie_block->ckc = 0;
3535         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3536         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3537         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3538
3539         /* ... the data in sync regs */
3540         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3541         vcpu->run->s.regs.ckc = 0;
3542         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3543         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3544         vcpu->run->psw_addr = 0;
3545         vcpu->run->psw_mask = 0;
3546         vcpu->run->s.regs.todpr = 0;
3547         vcpu->run->s.regs.cputm = 0;
3548         vcpu->run->s.regs.ckc = 0;
3549         vcpu->run->s.regs.pp = 0;
3550         vcpu->run->s.regs.gbea = 1;
3551         vcpu->run->s.regs.fpc = 0;
3552         /*
3553          * Do not reset these registers in the protected case, as some of
3554          * them are overlayed and they are not accessible in this case
3555          * anyway.
3556          */
3557         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3558                 vcpu->arch.sie_block->gbea = 1;
3559                 vcpu->arch.sie_block->pp = 0;
3560                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3561                 vcpu->arch.sie_block->todpr = 0;
3562         }
3563 }
3564
3565 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3566 {
3567         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3568
3569         /* Clear reset is a superset of the initial reset */
3570         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3571
3572         memset(&regs->gprs, 0, sizeof(regs->gprs));
3573         memset(&regs->vrs, 0, sizeof(regs->vrs));
3574         memset(&regs->acrs, 0, sizeof(regs->acrs));
3575         memset(&regs->gscb, 0, sizeof(regs->gscb));
3576
3577         regs->etoken = 0;
3578         regs->etoken_extension = 0;
3579 }
3580
3581 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3582 {
3583         vcpu_load(vcpu);
3584         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3585         vcpu_put(vcpu);
3586         return 0;
3587 }
3588
3589 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3590 {
3591         vcpu_load(vcpu);
3592         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3593         vcpu_put(vcpu);
3594         return 0;
3595 }
3596
3597 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3598                                   struct kvm_sregs *sregs)
3599 {
3600         vcpu_load(vcpu);
3601
3602         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3603         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3604
3605         vcpu_put(vcpu);
3606         return 0;
3607 }
3608
3609 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3610                                   struct kvm_sregs *sregs)
3611 {
3612         vcpu_load(vcpu);
3613
3614         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3615         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3616
3617         vcpu_put(vcpu);
3618         return 0;
3619 }
3620
3621 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3622 {
3623         int ret = 0;
3624
3625         vcpu_load(vcpu);
3626
3627         if (test_fp_ctl(fpu->fpc)) {
3628                 ret = -EINVAL;
3629                 goto out;
3630         }
3631         vcpu->run->s.regs.fpc = fpu->fpc;
3632         if (MACHINE_HAS_VX)
3633                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3634                                  (freg_t *) fpu->fprs);
3635         else
3636                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3637
3638 out:
3639         vcpu_put(vcpu);
3640         return ret;
3641 }
3642
3643 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3644 {
3645         vcpu_load(vcpu);
3646
3647         /* make sure we have the latest values */
3648         save_fpu_regs();
3649         if (MACHINE_HAS_VX)
3650                 convert_vx_to_fp((freg_t *) fpu->fprs,
3651                                  (__vector128 *) vcpu->run->s.regs.vrs);
3652         else
3653                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3654         fpu->fpc = vcpu->run->s.regs.fpc;
3655
3656         vcpu_put(vcpu);
3657         return 0;
3658 }
3659
3660 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3661 {
3662         int rc = 0;
3663
3664         if (!is_vcpu_stopped(vcpu))
3665                 rc = -EBUSY;
3666         else {
3667                 vcpu->run->psw_mask = psw.mask;
3668                 vcpu->run->psw_addr = psw.addr;
3669         }
3670         return rc;
3671 }
3672
3673 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3674                                   struct kvm_translation *tr)
3675 {
3676         return -EINVAL; /* not implemented yet */
3677 }
3678
3679 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3680                               KVM_GUESTDBG_USE_HW_BP | \
3681                               KVM_GUESTDBG_ENABLE)
3682
3683 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3684                                         struct kvm_guest_debug *dbg)
3685 {
3686         int rc = 0;
3687
3688         vcpu_load(vcpu);
3689
3690         vcpu->guest_debug = 0;
3691         kvm_s390_clear_bp_data(vcpu);
3692
3693         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3694                 rc = -EINVAL;
3695                 goto out;
3696         }
3697         if (!sclp.has_gpere) {
3698                 rc = -EINVAL;
3699                 goto out;
3700         }
3701
3702         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3703                 vcpu->guest_debug = dbg->control;
3704                 /* enforce guest PER */
3705                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3706
3707                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3708                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3709         } else {
3710                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3711                 vcpu->arch.guestdbg.last_bp = 0;
3712         }
3713
3714         if (rc) {
3715                 vcpu->guest_debug = 0;
3716                 kvm_s390_clear_bp_data(vcpu);
3717                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3718         }
3719
3720 out:
3721         vcpu_put(vcpu);
3722         return rc;
3723 }
3724
3725 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3726                                     struct kvm_mp_state *mp_state)
3727 {
3728         int ret;
3729
3730         vcpu_load(vcpu);
3731
3732         /* CHECK_STOP and LOAD are not supported yet */
3733         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3734                                       KVM_MP_STATE_OPERATING;
3735
3736         vcpu_put(vcpu);
3737         return ret;
3738 }
3739
3740 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3741                                     struct kvm_mp_state *mp_state)
3742 {
3743         int rc = 0;
3744
3745         vcpu_load(vcpu);
3746
3747         /* user space knows about this interface - let it control the state */
3748         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3749
3750         switch (mp_state->mp_state) {
3751         case KVM_MP_STATE_STOPPED:
3752                 rc = kvm_s390_vcpu_stop(vcpu);
3753                 break;
3754         case KVM_MP_STATE_OPERATING:
3755                 rc = kvm_s390_vcpu_start(vcpu);
3756                 break;
3757         case KVM_MP_STATE_LOAD:
3758                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3759                         rc = -ENXIO;
3760                         break;
3761                 }
3762                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3763                 break;
3764         case KVM_MP_STATE_CHECK_STOP:
3765                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3766         default:
3767                 rc = -ENXIO;
3768         }
3769
3770         vcpu_put(vcpu);
3771         return rc;
3772 }
3773
3774 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3775 {
3776         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3777 }
3778
3779 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3780 {
3781 retry:
3782         kvm_s390_vcpu_request_handled(vcpu);
3783         if (!kvm_request_pending(vcpu))
3784                 return 0;
3785         /*
3786          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3787          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3788          * This ensures that the ipte instruction for this request has
3789          * already finished. We might race against a second unmapper that
3790          * wants to set the blocking bit. Lets just retry the request loop.
3791          */
3792         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3793                 int rc;
3794                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3795                                           kvm_s390_get_prefix(vcpu),
3796                                           PAGE_SIZE * 2, PROT_WRITE);
3797                 if (rc) {
3798                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3799                         return rc;
3800                 }
3801                 goto retry;
3802         }
3803
3804         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3805                 vcpu->arch.sie_block->ihcpu = 0xffff;
3806                 goto retry;
3807         }
3808
3809         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3810                 if (!ibs_enabled(vcpu)) {
3811                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3812                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3813                 }
3814                 goto retry;
3815         }
3816
3817         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3818                 if (ibs_enabled(vcpu)) {
3819                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3820                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3821                 }
3822                 goto retry;
3823         }
3824
3825         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3826                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3827                 goto retry;
3828         }
3829
3830         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3831                 /*
3832                  * Disable CMM virtualization; we will emulate the ESSA
3833                  * instruction manually, in order to provide additional
3834                  * functionalities needed for live migration.
3835                  */
3836                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3837                 goto retry;
3838         }
3839
3840         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3841                 /*
3842                  * Re-enable CMM virtualization if CMMA is available and
3843                  * CMM has been used.
3844                  */
3845                 if ((vcpu->kvm->arch.use_cmma) &&
3846                     (vcpu->kvm->mm->context.uses_cmm))
3847                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3848                 goto retry;
3849         }
3850
3851         /* nothing to do, just clear the request */
3852         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3853         /* we left the vsie handler, nothing to do, just clear the request */
3854         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3855
3856         return 0;
3857 }
3858
3859 void kvm_s390_set_tod_clock(struct kvm *kvm,
3860                             const struct kvm_s390_vm_tod_clock *gtod)
3861 {
3862         struct kvm_vcpu *vcpu;
3863         struct kvm_s390_tod_clock_ext htod;
3864         int i;
3865
3866         mutex_lock(&kvm->lock);
3867         preempt_disable();
3868
3869         get_tod_clock_ext((char *)&htod);
3870
3871         kvm->arch.epoch = gtod->tod - htod.tod;
3872         kvm->arch.epdx = 0;
3873         if (test_kvm_facility(kvm, 139)) {
3874                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3875                 if (kvm->arch.epoch > gtod->tod)
3876                         kvm->arch.epdx -= 1;
3877         }
3878
3879         kvm_s390_vcpu_block_all(kvm);
3880         kvm_for_each_vcpu(i, vcpu, kvm) {
3881                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3882                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3883         }
3884
3885         kvm_s390_vcpu_unblock_all(kvm);
3886         preempt_enable();
3887         mutex_unlock(&kvm->lock);
3888 }
3889
3890 /**
3891  * kvm_arch_fault_in_page - fault-in guest page if necessary
3892  * @vcpu: The corresponding virtual cpu
3893  * @gpa: Guest physical address
3894  * @writable: Whether the page should be writable or not
3895  *
3896  * Make sure that a guest page has been faulted-in on the host.
3897  *
3898  * Return: Zero on success, negative error code otherwise.
3899  */
3900 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3901 {
3902         return gmap_fault(vcpu->arch.gmap, gpa,
3903                           writable ? FAULT_FLAG_WRITE : 0);
3904 }
3905
3906 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3907                                       unsigned long token)
3908 {
3909         struct kvm_s390_interrupt inti;
3910         struct kvm_s390_irq irq;
3911
3912         if (start_token) {
3913                 irq.u.ext.ext_params2 = token;
3914                 irq.type = KVM_S390_INT_PFAULT_INIT;
3915                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3916         } else {
3917                 inti.type = KVM_S390_INT_PFAULT_DONE;
3918                 inti.parm64 = token;
3919                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3920         }
3921 }
3922
3923 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3924                                      struct kvm_async_pf *work)
3925 {
3926         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3927         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3928 }
3929
3930 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3931                                  struct kvm_async_pf *work)
3932 {
3933         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3934         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3935 }
3936
3937 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3938                                struct kvm_async_pf *work)
3939 {
3940         /* s390 will always inject the page directly */
3941 }
3942
3943 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3944 {
3945         /*
3946          * s390 will always inject the page directly,
3947          * but we still want check_async_completion to cleanup
3948          */
3949         return true;
3950 }
3951
3952 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3953 {
3954         hva_t hva;
3955         struct kvm_arch_async_pf arch;
3956         int rc;
3957
3958         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3959                 return 0;
3960         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3961             vcpu->arch.pfault_compare)
3962                 return 0;
3963         if (psw_extint_disabled(vcpu))
3964                 return 0;
3965         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3966                 return 0;
3967         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3968                 return 0;
3969         if (!vcpu->arch.gmap->pfault_enabled)
3970                 return 0;
3971
3972         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3973         hva += current->thread.gmap_addr & ~PAGE_MASK;
3974         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3975                 return 0;
3976
3977         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3978         return rc;
3979 }
3980
3981 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3982 {
3983         int rc, cpuflags;
3984
3985         /*
3986          * On s390 notifications for arriving pages will be delivered directly
3987          * to the guest but the house keeping for completed pfaults is
3988          * handled outside the worker.
3989          */
3990         kvm_check_async_pf_completion(vcpu);
3991
3992         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3993         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3994
3995         if (need_resched())
3996                 schedule();
3997
3998         if (test_cpu_flag(CIF_MCCK_PENDING))
3999                 s390_handle_mcck();
4000
4001         if (!kvm_is_ucontrol(vcpu->kvm)) {
4002                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4003                 if (rc)
4004                         return rc;
4005         }
4006
4007         rc = kvm_s390_handle_requests(vcpu);
4008         if (rc)
4009                 return rc;
4010
4011         if (guestdbg_enabled(vcpu)) {
4012                 kvm_s390_backup_guest_per_regs(vcpu);
4013                 kvm_s390_patch_guest_per_regs(vcpu);
4014         }
4015
4016         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4017
4018         vcpu->arch.sie_block->icptcode = 0;
4019         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4020         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4021         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4022
4023         return 0;
4024 }
4025
4026 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4027 {
4028         struct kvm_s390_pgm_info pgm_info = {
4029                 .code = PGM_ADDRESSING,
4030         };
4031         u8 opcode, ilen;
4032         int rc;
4033
4034         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4035         trace_kvm_s390_sie_fault(vcpu);
4036
4037         /*
4038          * We want to inject an addressing exception, which is defined as a
4039          * suppressing or terminating exception. However, since we came here
4040          * by a DAT access exception, the PSW still points to the faulting
4041          * instruction since DAT exceptions are nullifying. So we've got
4042          * to look up the current opcode to get the length of the instruction
4043          * to be able to forward the PSW.
4044          */
4045         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4046         ilen = insn_length(opcode);
4047         if (rc < 0) {
4048                 return rc;
4049         } else if (rc) {
4050                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4051                  * Forward by arbitrary ilc, injection will take care of
4052                  * nullification if necessary.
4053                  */
4054                 pgm_info = vcpu->arch.pgm;
4055                 ilen = 4;
4056         }
4057         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4058         kvm_s390_forward_psw(vcpu, ilen);
4059         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4060 }
4061
4062 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4063 {
4064         struct mcck_volatile_info *mcck_info;
4065         struct sie_page *sie_page;
4066
4067         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4068                    vcpu->arch.sie_block->icptcode);
4069         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4070
4071         if (guestdbg_enabled(vcpu))
4072                 kvm_s390_restore_guest_per_regs(vcpu);
4073
4074         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4075         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4076
4077         if (exit_reason == -EINTR) {
4078                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4079                 sie_page = container_of(vcpu->arch.sie_block,
4080                                         struct sie_page, sie_block);
4081                 mcck_info = &sie_page->mcck_info;
4082                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4083                 return 0;
4084         }
4085
4086         if (vcpu->arch.sie_block->icptcode > 0) {
4087                 int rc = kvm_handle_sie_intercept(vcpu);
4088
4089                 if (rc != -EOPNOTSUPP)
4090                         return rc;
4091                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4092                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4093                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4094                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4095                 return -EREMOTE;
4096         } else if (exit_reason != -EFAULT) {
4097                 vcpu->stat.exit_null++;
4098                 return 0;
4099         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4100                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4101                 vcpu->run->s390_ucontrol.trans_exc_code =
4102                                                 current->thread.gmap_addr;
4103                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4104                 return -EREMOTE;
4105         } else if (current->thread.gmap_pfault) {
4106                 trace_kvm_s390_major_guest_pfault(vcpu);
4107                 current->thread.gmap_pfault = 0;
4108                 if (kvm_arch_setup_async_pf(vcpu))
4109                         return 0;
4110                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4111         }
4112         return vcpu_post_run_fault_in_sie(vcpu);
4113 }
4114
4115 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4116 static int __vcpu_run(struct kvm_vcpu *vcpu)
4117 {
4118         int rc, exit_reason;
4119         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4120
4121         /*
4122          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4123          * ning the guest), so that memslots (and other stuff) are protected
4124          */
4125         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4126
4127         do {
4128                 rc = vcpu_pre_run(vcpu);
4129                 if (rc)
4130                         break;
4131
4132                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4133                 /*
4134                  * As PF_VCPU will be used in fault handler, between
4135                  * guest_enter and guest_exit should be no uaccess.
4136                  */
4137                 local_irq_disable();
4138                 guest_enter_irqoff();
4139                 __disable_cpu_timer_accounting(vcpu);
4140                 local_irq_enable();
4141                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4142                         memcpy(sie_page->pv_grregs,
4143                                vcpu->run->s.regs.gprs,
4144                                sizeof(sie_page->pv_grregs));
4145                 }
4146                 exit_reason = sie64a(vcpu->arch.sie_block,
4147                                      vcpu->run->s.regs.gprs);
4148                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4149                         memcpy(vcpu->run->s.regs.gprs,
4150                                sie_page->pv_grregs,
4151                                sizeof(sie_page->pv_grregs));
4152                         /*
4153                          * We're not allowed to inject interrupts on intercepts
4154                          * that leave the guest state in an "in-between" state
4155                          * where the next SIE entry will do a continuation.
4156                          * Fence interrupts in our "internal" PSW.
4157                          */
4158                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4159                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4160                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4161                         }
4162                 }
4163                 local_irq_disable();
4164                 __enable_cpu_timer_accounting(vcpu);
4165                 guest_exit_irqoff();
4166                 local_irq_enable();
4167                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4168
4169                 rc = vcpu_post_run(vcpu, exit_reason);
4170         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4171
4172         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4173         return rc;
4174 }
4175
4176 static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4177 {
4178         struct runtime_instr_cb *riccb;
4179         struct gs_cb *gscb;
4180
4181         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4182         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4183         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4184         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4185         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4186                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4187                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4188                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4189         }
4190         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4191                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4192                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4193                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4194                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4195                         kvm_clear_async_pf_completion_queue(vcpu);
4196         }
4197         /*
4198          * If userspace sets the riccb (e.g. after migration) to a valid state,
4199          * we should enable RI here instead of doing the lazy enablement.
4200          */
4201         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4202             test_kvm_facility(vcpu->kvm, 64) &&
4203             riccb->v &&
4204             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4205                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4206                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4207         }
4208         /*
4209          * If userspace sets the gscb (e.g. after migration) to non-zero,
4210          * we should enable GS here instead of doing the lazy enablement.
4211          */
4212         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4213             test_kvm_facility(vcpu->kvm, 133) &&
4214             gscb->gssm &&
4215             !vcpu->arch.gs_enabled) {
4216                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4217                 vcpu->arch.sie_block->ecb |= ECB_GS;
4218                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4219                 vcpu->arch.gs_enabled = 1;
4220         }
4221         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4222             test_kvm_facility(vcpu->kvm, 82)) {
4223                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4224                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4225         }
4226         if (MACHINE_HAS_GS) {
4227                 preempt_disable();
4228                 __ctl_set_bit(2, 4);
4229                 if (current->thread.gs_cb) {
4230                         vcpu->arch.host_gscb = current->thread.gs_cb;
4231                         save_gs_cb(vcpu->arch.host_gscb);
4232                 }
4233                 if (vcpu->arch.gs_enabled) {
4234                         current->thread.gs_cb = (struct gs_cb *)
4235                                                 &vcpu->run->s.regs.gscb;
4236                         restore_gs_cb(current->thread.gs_cb);
4237                 }
4238                 preempt_enable();
4239         }
4240         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4241 }
4242
4243 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4244 {
4245         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4246                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4247         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4248                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4249                 /* some control register changes require a tlb flush */
4250                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4251         }
4252         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4253                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4254                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4255         }
4256         save_access_regs(vcpu->arch.host_acrs);
4257         restore_access_regs(vcpu->run->s.regs.acrs);
4258         /* save host (userspace) fprs/vrs */
4259         save_fpu_regs();
4260         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4261         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4262         if (MACHINE_HAS_VX)
4263                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4264         else
4265                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4266         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4267         if (test_fp_ctl(current->thread.fpu.fpc))
4268                 /* User space provided an invalid FPC, let's clear it */
4269                 current->thread.fpu.fpc = 0;
4270
4271         /* Sync fmt2 only data */
4272         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4273                 sync_regs_fmt2(vcpu, kvm_run);
4274         } else {
4275                 /*
4276                  * In several places we have to modify our internal view to
4277                  * not do things that are disallowed by the ultravisor. For
4278                  * example we must not inject interrupts after specific exits
4279                  * (e.g. 112 prefix page not secure). We do this by turning
4280                  * off the machine check, external and I/O interrupt bits
4281                  * of our PSW copy. To avoid getting validity intercepts, we
4282                  * do only accept the condition code from userspace.
4283                  */
4284                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4285                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4286                                                    PSW_MASK_CC;
4287         }
4288
4289         kvm_run->kvm_dirty_regs = 0;
4290 }
4291
4292 static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4293 {
4294         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4295         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4296         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4297         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4298         if (MACHINE_HAS_GS) {
4299                 __ctl_set_bit(2, 4);
4300                 if (vcpu->arch.gs_enabled)
4301                         save_gs_cb(current->thread.gs_cb);
4302                 preempt_disable();
4303                 current->thread.gs_cb = vcpu->arch.host_gscb;
4304                 restore_gs_cb(vcpu->arch.host_gscb);
4305                 preempt_enable();
4306                 if (!vcpu->arch.host_gscb)
4307                         __ctl_clear_bit(2, 4);
4308                 vcpu->arch.host_gscb = NULL;
4309         }
4310         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4311 }
4312
4313 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4314 {
4315         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4316         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4317         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4318         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4319         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4320         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4321         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4322         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4323         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4324         save_access_regs(vcpu->run->s.regs.acrs);
4325         restore_access_regs(vcpu->arch.host_acrs);
4326         /* Save guest register state */
4327         save_fpu_regs();
4328         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4329         /* Restore will be done lazily at return */
4330         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4331         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4332         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4333                 store_regs_fmt2(vcpu, kvm_run);
4334 }
4335
4336 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4337 {
4338         int rc;
4339
4340         if (kvm_run->immediate_exit)
4341                 return -EINTR;
4342
4343         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4344             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4345                 return -EINVAL;
4346
4347         vcpu_load(vcpu);
4348
4349         if (guestdbg_exit_pending(vcpu)) {
4350                 kvm_s390_prepare_debug_exit(vcpu);
4351                 rc = 0;
4352                 goto out;
4353         }
4354
4355         kvm_sigset_activate(vcpu);
4356
4357         /*
4358          * no need to check the return value of vcpu_start as it can only have
4359          * an error for protvirt, but protvirt means user cpu state
4360          */
4361         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4362                 kvm_s390_vcpu_start(vcpu);
4363         } else if (is_vcpu_stopped(vcpu)) {
4364                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4365                                    vcpu->vcpu_id);
4366                 rc = -EINVAL;
4367                 goto out;
4368         }
4369
4370         sync_regs(vcpu, kvm_run);
4371         enable_cpu_timer_accounting(vcpu);
4372
4373         might_fault();
4374         rc = __vcpu_run(vcpu);
4375
4376         if (signal_pending(current) && !rc) {
4377                 kvm_run->exit_reason = KVM_EXIT_INTR;
4378                 rc = -EINTR;
4379         }
4380
4381         if (guestdbg_exit_pending(vcpu) && !rc)  {
4382                 kvm_s390_prepare_debug_exit(vcpu);
4383                 rc = 0;
4384         }
4385
4386         if (rc == -EREMOTE) {
4387                 /* userspace support is needed, kvm_run has been prepared */
4388                 rc = 0;
4389         }
4390
4391         disable_cpu_timer_accounting(vcpu);
4392         store_regs(vcpu, kvm_run);
4393
4394         kvm_sigset_deactivate(vcpu);
4395
4396         vcpu->stat.exit_userspace++;
4397 out:
4398         vcpu_put(vcpu);
4399         return rc;
4400 }
4401
4402 /*
4403  * store status at address
4404  * we use have two special cases:
4405  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4406  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4407  */
4408 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4409 {
4410         unsigned char archmode = 1;
4411         freg_t fprs[NUM_FPRS];
4412         unsigned int px;
4413         u64 clkcomp, cputm;
4414         int rc;
4415
4416         px = kvm_s390_get_prefix(vcpu);
4417         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4418                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4419                         return -EFAULT;
4420                 gpa = 0;
4421         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4422                 if (write_guest_real(vcpu, 163, &archmode, 1))
4423                         return -EFAULT;
4424                 gpa = px;
4425         } else
4426                 gpa -= __LC_FPREGS_SAVE_AREA;
4427
4428         /* manually convert vector registers if necessary */
4429         if (MACHINE_HAS_VX) {
4430                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4431                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4432                                      fprs, 128);
4433         } else {
4434                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4435                                      vcpu->run->s.regs.fprs, 128);
4436         }
4437         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4438                               vcpu->run->s.regs.gprs, 128);
4439         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4440                               &vcpu->arch.sie_block->gpsw, 16);
4441         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4442                               &px, 4);
4443         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4444                               &vcpu->run->s.regs.fpc, 4);
4445         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4446                               &vcpu->arch.sie_block->todpr, 4);
4447         cputm = kvm_s390_get_cpu_timer(vcpu);
4448         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4449                               &cputm, 8);
4450         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4451         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4452                               &clkcomp, 8);
4453         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4454                               &vcpu->run->s.regs.acrs, 64);
4455         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4456                               &vcpu->arch.sie_block->gcr, 128);
4457         return rc ? -EFAULT : 0;
4458 }
4459
4460 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4461 {
4462         /*
4463          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4464          * switch in the run ioctl. Let's update our copies before we save
4465          * it into the save area
4466          */
4467         save_fpu_regs();
4468         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4469         save_access_regs(vcpu->run->s.regs.acrs);
4470
4471         return kvm_s390_store_status_unloaded(vcpu, addr);
4472 }
4473
4474 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4475 {
4476         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4477         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4478 }
4479
4480 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4481 {
4482         unsigned int i;
4483         struct kvm_vcpu *vcpu;
4484
4485         kvm_for_each_vcpu(i, vcpu, kvm) {
4486                 __disable_ibs_on_vcpu(vcpu);
4487         }
4488 }
4489
4490 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4491 {
4492         if (!sclp.has_ibs)
4493                 return;
4494         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4495         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4496 }
4497
4498 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4499 {
4500         int i, online_vcpus, r = 0, started_vcpus = 0;
4501
4502         if (!is_vcpu_stopped(vcpu))
4503                 return 0;
4504
4505         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4506         /* Only one cpu at a time may enter/leave the STOPPED state. */
4507         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4508         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4509
4510         /* Let's tell the UV that we want to change into the operating state */
4511         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4512                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4513                 if (r) {
4514                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4515                         return r;
4516                 }
4517         }
4518
4519         for (i = 0; i < online_vcpus; i++) {
4520                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4521                         started_vcpus++;
4522         }
4523
4524         if (started_vcpus == 0) {
4525                 /* we're the only active VCPU -> speed it up */
4526                 __enable_ibs_on_vcpu(vcpu);
4527         } else if (started_vcpus == 1) {
4528                 /*
4529                  * As we are starting a second VCPU, we have to disable
4530                  * the IBS facility on all VCPUs to remove potentially
4531                  * oustanding ENABLE requests.
4532                  */
4533                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4534         }
4535
4536         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4537         /*
4538          * The real PSW might have changed due to a RESTART interpreted by the
4539          * ultravisor. We block all interrupts and let the next sie exit
4540          * refresh our view.
4541          */
4542         if (kvm_s390_pv_cpu_is_protected(vcpu))
4543                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4544         /*
4545          * Another VCPU might have used IBS while we were offline.
4546          * Let's play safe and flush the VCPU at startup.
4547          */
4548         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4549         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4550         return 0;
4551 }
4552
4553 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4554 {
4555         int i, online_vcpus, r = 0, started_vcpus = 0;
4556         struct kvm_vcpu *started_vcpu = NULL;
4557
4558         if (is_vcpu_stopped(vcpu))
4559                 return 0;
4560
4561         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4562         /* Only one cpu at a time may enter/leave the STOPPED state. */
4563         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4564         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4565
4566         /* Let's tell the UV that we want to change into the stopped state */
4567         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4568                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4569                 if (r) {
4570                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4571                         return r;
4572                 }
4573         }
4574
4575         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4576         kvm_s390_clear_stop_irq(vcpu);
4577
4578         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4579         __disable_ibs_on_vcpu(vcpu);
4580
4581         for (i = 0; i < online_vcpus; i++) {
4582                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4583                         started_vcpus++;
4584                         started_vcpu = vcpu->kvm->vcpus[i];
4585                 }
4586         }
4587
4588         if (started_vcpus == 1) {
4589                 /*
4590                  * As we only have one VCPU left, we want to enable the
4591                  * IBS facility for that VCPU to speed it up.
4592                  */
4593                 __enable_ibs_on_vcpu(started_vcpu);
4594         }
4595
4596         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4597         return 0;
4598 }
4599
4600 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4601                                      struct kvm_enable_cap *cap)
4602 {
4603         int r;
4604
4605         if (cap->flags)
4606                 return -EINVAL;
4607
4608         switch (cap->cap) {
4609         case KVM_CAP_S390_CSS_SUPPORT:
4610                 if (!vcpu->kvm->arch.css_support) {
4611                         vcpu->kvm->arch.css_support = 1;
4612                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4613                         trace_kvm_s390_enable_css(vcpu->kvm);
4614                 }
4615                 r = 0;
4616                 break;
4617         default:
4618                 r = -EINVAL;
4619                 break;
4620         }
4621         return r;
4622 }
4623
4624 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4625                                    struct kvm_s390_mem_op *mop)
4626 {
4627         void __user *uaddr = (void __user *)mop->buf;
4628         int r = 0;
4629
4630         if (mop->flags || !mop->size)
4631                 return -EINVAL;
4632         if (mop->size + mop->sida_offset < mop->size)
4633                 return -EINVAL;
4634         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4635                 return -E2BIG;
4636
4637         switch (mop->op) {
4638         case KVM_S390_MEMOP_SIDA_READ:
4639                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4640                                  mop->sida_offset), mop->size))
4641                         r = -EFAULT;
4642
4643                 break;
4644         case KVM_S390_MEMOP_SIDA_WRITE:
4645                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4646                                    mop->sida_offset), uaddr, mop->size))
4647                         r = -EFAULT;
4648                 break;
4649         }
4650         return r;
4651 }
4652 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4653                                   struct kvm_s390_mem_op *mop)
4654 {
4655         void __user *uaddr = (void __user *)mop->buf;
4656         void *tmpbuf = NULL;
4657         int r = 0;
4658         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4659                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4660
4661         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4662                 return -EINVAL;
4663
4664         if (mop->size > MEM_OP_MAX_SIZE)
4665                 return -E2BIG;
4666
4667         if (kvm_s390_pv_cpu_is_protected(vcpu))
4668                 return -EINVAL;
4669
4670         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4671                 tmpbuf = vmalloc(mop->size);
4672                 if (!tmpbuf)
4673                         return -ENOMEM;
4674         }
4675
4676         switch (mop->op) {
4677         case KVM_S390_MEMOP_LOGICAL_READ:
4678                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4679                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4680                                             mop->size, GACC_FETCH);
4681                         break;
4682                 }
4683                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4684                 if (r == 0) {
4685                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4686                                 r = -EFAULT;
4687                 }
4688                 break;
4689         case KVM_S390_MEMOP_LOGICAL_WRITE:
4690                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4691                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4692                                             mop->size, GACC_STORE);
4693                         break;
4694                 }
4695                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4696                         r = -EFAULT;
4697                         break;
4698                 }
4699                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4700                 break;
4701         }
4702
4703         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4704                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4705
4706         vfree(tmpbuf);
4707         return r;
4708 }
4709
4710 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4711                                       struct kvm_s390_mem_op *mop)
4712 {
4713         int r, srcu_idx;
4714
4715         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4716
4717         switch (mop->op) {
4718         case KVM_S390_MEMOP_LOGICAL_READ:
4719         case KVM_S390_MEMOP_LOGICAL_WRITE:
4720                 r = kvm_s390_guest_mem_op(vcpu, mop);
4721                 break;
4722         case KVM_S390_MEMOP_SIDA_READ:
4723         case KVM_S390_MEMOP_SIDA_WRITE:
4724                 /* we are locked against sida going away by the vcpu->mutex */
4725                 r = kvm_s390_guest_sida_op(vcpu, mop);
4726                 break;
4727         default:
4728                 r = -EINVAL;
4729         }
4730
4731         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4732         return r;
4733 }
4734
4735 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4736                                unsigned int ioctl, unsigned long arg)
4737 {
4738         struct kvm_vcpu *vcpu = filp->private_data;
4739         void __user *argp = (void __user *)arg;
4740
4741         switch (ioctl) {
4742         case KVM_S390_IRQ: {
4743                 struct kvm_s390_irq s390irq;
4744
4745                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4746                         return -EFAULT;
4747                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4748         }
4749         case KVM_S390_INTERRUPT: {
4750                 struct kvm_s390_interrupt s390int;
4751                 struct kvm_s390_irq s390irq = {};
4752
4753                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4754                         return -EFAULT;
4755                 if (s390int_to_s390irq(&s390int, &s390irq))
4756                         return -EINVAL;
4757                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4758         }
4759         }
4760         return -ENOIOCTLCMD;
4761 }
4762
4763 long kvm_arch_vcpu_ioctl(struct file *filp,
4764                          unsigned int ioctl, unsigned long arg)
4765 {
4766         struct kvm_vcpu *vcpu = filp->private_data;
4767         void __user *argp = (void __user *)arg;
4768         int idx;
4769         long r;
4770         u16 rc, rrc;
4771
4772         vcpu_load(vcpu);
4773
4774         switch (ioctl) {
4775         case KVM_S390_STORE_STATUS:
4776                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4777                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4778                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4779                 break;
4780         case KVM_S390_SET_INITIAL_PSW: {
4781                 psw_t psw;
4782
4783                 r = -EFAULT;
4784                 if (copy_from_user(&psw, argp, sizeof(psw)))
4785                         break;
4786                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4787                 break;
4788         }
4789         case KVM_S390_CLEAR_RESET:
4790                 r = 0;
4791                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4792                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4793                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4794                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4795                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4796                                    rc, rrc);
4797                 }
4798                 break;
4799         case KVM_S390_INITIAL_RESET:
4800                 r = 0;
4801                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4802                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4803                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4804                                           UVC_CMD_CPU_RESET_INITIAL,
4805                                           &rc, &rrc);
4806                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4807                                    rc, rrc);
4808                 }
4809                 break;
4810         case KVM_S390_NORMAL_RESET:
4811                 r = 0;
4812                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4813                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4814                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4815                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4816                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4817                                    rc, rrc);
4818                 }
4819                 break;
4820         case KVM_SET_ONE_REG:
4821         case KVM_GET_ONE_REG: {
4822                 struct kvm_one_reg reg;
4823                 r = -EINVAL;
4824                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4825                         break;
4826                 r = -EFAULT;
4827                 if (copy_from_user(&reg, argp, sizeof(reg)))
4828                         break;
4829                 if (ioctl == KVM_SET_ONE_REG)
4830                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4831                 else
4832                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4833                 break;
4834         }
4835 #ifdef CONFIG_KVM_S390_UCONTROL
4836         case KVM_S390_UCAS_MAP: {
4837                 struct kvm_s390_ucas_mapping ucasmap;
4838
4839                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4840                         r = -EFAULT;
4841                         break;
4842                 }
4843
4844                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4845                         r = -EINVAL;
4846                         break;
4847                 }
4848
4849                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4850                                      ucasmap.vcpu_addr, ucasmap.length);
4851                 break;
4852         }
4853         case KVM_S390_UCAS_UNMAP: {
4854                 struct kvm_s390_ucas_mapping ucasmap;
4855
4856                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4857                         r = -EFAULT;
4858                         break;
4859                 }
4860
4861                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4862                         r = -EINVAL;
4863                         break;
4864                 }
4865
4866                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4867                         ucasmap.length);
4868                 break;
4869         }
4870 #endif
4871         case KVM_S390_VCPU_FAULT: {
4872                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4873                 break;
4874         }
4875         case KVM_ENABLE_CAP:
4876         {
4877                 struct kvm_enable_cap cap;
4878                 r = -EFAULT;
4879                 if (copy_from_user(&cap, argp, sizeof(cap)))
4880                         break;
4881                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4882                 break;
4883         }
4884         case KVM_S390_MEM_OP: {
4885                 struct kvm_s390_mem_op mem_op;
4886
4887                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4888                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4889                 else
4890                         r = -EFAULT;
4891                 break;
4892         }
4893         case KVM_S390_SET_IRQ_STATE: {
4894                 struct kvm_s390_irq_state irq_state;
4895
4896                 r = -EFAULT;
4897                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4898                         break;
4899                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4900                     irq_state.len == 0 ||
4901                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4902                         r = -EINVAL;
4903                         break;
4904                 }
4905                 /* do not use irq_state.flags, it will break old QEMUs */
4906                 r = kvm_s390_set_irq_state(vcpu,
4907                                            (void __user *) irq_state.buf,
4908                                            irq_state.len);
4909                 break;
4910         }
4911         case KVM_S390_GET_IRQ_STATE: {
4912                 struct kvm_s390_irq_state irq_state;
4913
4914                 r = -EFAULT;
4915                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4916                         break;
4917                 if (irq_state.len == 0) {
4918                         r = -EINVAL;
4919                         break;
4920                 }
4921                 /* do not use irq_state.flags, it will break old QEMUs */
4922                 r = kvm_s390_get_irq_state(vcpu,
4923                                            (__u8 __user *)  irq_state.buf,
4924                                            irq_state.len);
4925                 break;
4926         }
4927         default:
4928                 r = -ENOTTY;
4929         }
4930
4931         vcpu_put(vcpu);
4932         return r;
4933 }
4934
4935 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4936 {
4937 #ifdef CONFIG_KVM_S390_UCONTROL
4938         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4939                  && (kvm_is_ucontrol(vcpu->kvm))) {
4940                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4941                 get_page(vmf->page);
4942                 return 0;
4943         }
4944 #endif
4945         return VM_FAULT_SIGBUS;
4946 }
4947
4948 /* Section: memory related */
4949 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4950                                    struct kvm_memory_slot *memslot,
4951                                    const struct kvm_userspace_memory_region *mem,
4952                                    enum kvm_mr_change change)
4953 {
4954         /* A few sanity checks. We can have memory slots which have to be
4955            located/ended at a segment boundary (1MB). The memory in userland is
4956            ok to be fragmented into various different vmas. It is okay to mmap()
4957            and munmap() stuff in this slot after doing this call at any time */
4958
4959         if (mem->userspace_addr & 0xffffful)
4960                 return -EINVAL;
4961
4962         if (mem->memory_size & 0xffffful)
4963                 return -EINVAL;
4964
4965         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4966                 return -EINVAL;
4967
4968         /* When we are protected, we should not change the memory slots */
4969         if (kvm_s390_pv_get_handle(kvm))
4970                 return -EINVAL;
4971         return 0;
4972 }
4973
4974 void kvm_arch_commit_memory_region(struct kvm *kvm,
4975                                 const struct kvm_userspace_memory_region *mem,
4976                                 struct kvm_memory_slot *old,
4977                                 const struct kvm_memory_slot *new,
4978                                 enum kvm_mr_change change)
4979 {
4980         int rc = 0;
4981
4982         switch (change) {
4983         case KVM_MR_DELETE:
4984                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4985                                         old->npages * PAGE_SIZE);
4986                 break;
4987         case KVM_MR_MOVE:
4988                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4989                                         old->npages * PAGE_SIZE);
4990                 if (rc)
4991                         break;
4992                 fallthrough;
4993         case KVM_MR_CREATE:
4994                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4995                                       mem->guest_phys_addr, mem->memory_size);
4996                 break;
4997         case KVM_MR_FLAGS_ONLY:
4998                 break;
4999         default:
5000                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5001         }
5002         if (rc)
5003                 pr_warn("failed to commit memory region\n");
5004         return;
5005 }
5006
5007 static inline unsigned long nonhyp_mask(int i)
5008 {
5009         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5010
5011         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5012 }
5013
5014 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5015 {
5016         vcpu->valid_wakeup = false;
5017 }
5018
5019 static int __init kvm_s390_init(void)
5020 {
5021         int i;
5022
5023         if (!sclp.has_sief2) {
5024                 pr_info("SIE is not available\n");
5025                 return -ENODEV;
5026         }
5027
5028         if (nested && hpage) {
5029                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5030                 return -EINVAL;
5031         }
5032
5033         for (i = 0; i < 16; i++)
5034                 kvm_s390_fac_base[i] |=
5035                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5036
5037         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5038 }
5039
5040 static void __exit kvm_s390_exit(void)
5041 {
5042         kvm_exit();
5043 }
5044
5045 module_init(kvm_s390_init);
5046 module_exit(kvm_s390_exit);
5047
5048 /*
5049  * Enable autoloading of the kvm module.
5050  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5051  * since x86 takes a different approach.
5052  */
5053 #include <linux/miscdevice.h>
5054 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5055 MODULE_ALIAS("devname:kvm");