Merge tag 'for-5.20/io_uring-buffered-writes-2022-07-29' of git://git.kernel.dk/linux...
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * hosting IBM Z kernel virtual machines (s390x)
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
10  *               Jason J. Herne <jjherne@us.ibm.com>
11  */
12
13 #define KMSG_COMPONENT "kvm-s390"
14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
15
16 #include <linux/compiler.h>
17 #include <linux/err.h>
18 #include <linux/fs.h>
19 #include <linux/hrtimer.h>
20 #include <linux/init.h>
21 #include <linux/kvm.h>
22 #include <linux/kvm_host.h>
23 #include <linux/mman.h>
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <linux/sched/signal.h>
32 #include <linux/string.h>
33 #include <linux/pgtable.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/gmap.h>
39 #include <asm/nmi.h>
40 #include <asm/switch_to.h>
41 #include <asm/isc.h>
42 #include <asm/sclp.h>
43 #include <asm/cpacf.h>
44 #include <asm/timex.h>
45 #include <asm/ap.h>
46 #include <asm/uv.h>
47 #include <asm/fpu/api.h>
48 #include "kvm-s390.h"
49 #include "gaccess.h"
50
51 #define CREATE_TRACE_POINTS
52 #include "trace.h"
53 #include "trace-s390.h"
54
55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
56 #define LOCAL_IRQS 32
57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
59
60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
61         KVM_GENERIC_VM_STATS(),
62         STATS_DESC_COUNTER(VM, inject_io),
63         STATS_DESC_COUNTER(VM, inject_float_mchk),
64         STATS_DESC_COUNTER(VM, inject_pfault_done),
65         STATS_DESC_COUNTER(VM, inject_service_signal),
66         STATS_DESC_COUNTER(VM, inject_virtio)
67 };
68
69 const struct kvm_stats_header kvm_vm_stats_header = {
70         .name_size = KVM_STATS_NAME_SIZE,
71         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
72         .id_offset = sizeof(struct kvm_stats_header),
73         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
74         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
75                        sizeof(kvm_vm_stats_desc),
76 };
77
78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
79         KVM_GENERIC_VCPU_STATS(),
80         STATS_DESC_COUNTER(VCPU, exit_userspace),
81         STATS_DESC_COUNTER(VCPU, exit_null),
82         STATS_DESC_COUNTER(VCPU, exit_external_request),
83         STATS_DESC_COUNTER(VCPU, exit_io_request),
84         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
85         STATS_DESC_COUNTER(VCPU, exit_stop_request),
86         STATS_DESC_COUNTER(VCPU, exit_validity),
87         STATS_DESC_COUNTER(VCPU, exit_instruction),
88         STATS_DESC_COUNTER(VCPU, exit_pei),
89         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
90         STATS_DESC_COUNTER(VCPU, instruction_lctl),
91         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
92         STATS_DESC_COUNTER(VCPU, instruction_stctl),
93         STATS_DESC_COUNTER(VCPU, instruction_stctg),
94         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
95         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
96         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
97         STATS_DESC_COUNTER(VCPU, deliver_ckc),
98         STATS_DESC_COUNTER(VCPU, deliver_cputm),
99         STATS_DESC_COUNTER(VCPU, deliver_external_call),
100         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
101         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
102         STATS_DESC_COUNTER(VCPU, deliver_virtio),
103         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
104         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
105         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
106         STATS_DESC_COUNTER(VCPU, deliver_program),
107         STATS_DESC_COUNTER(VCPU, deliver_io),
108         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
109         STATS_DESC_COUNTER(VCPU, exit_wait_state),
110         STATS_DESC_COUNTER(VCPU, inject_ckc),
111         STATS_DESC_COUNTER(VCPU, inject_cputm),
112         STATS_DESC_COUNTER(VCPU, inject_external_call),
113         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
114         STATS_DESC_COUNTER(VCPU, inject_mchk),
115         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
116         STATS_DESC_COUNTER(VCPU, inject_program),
117         STATS_DESC_COUNTER(VCPU, inject_restart),
118         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
119         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
120         STATS_DESC_COUNTER(VCPU, instruction_epsw),
121         STATS_DESC_COUNTER(VCPU, instruction_gs),
122         STATS_DESC_COUNTER(VCPU, instruction_io_other),
123         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
124         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
125         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
126         STATS_DESC_COUNTER(VCPU, instruction_ptff),
127         STATS_DESC_COUNTER(VCPU, instruction_sck),
128         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
129         STATS_DESC_COUNTER(VCPU, instruction_stidp),
130         STATS_DESC_COUNTER(VCPU, instruction_spx),
131         STATS_DESC_COUNTER(VCPU, instruction_stpx),
132         STATS_DESC_COUNTER(VCPU, instruction_stap),
133         STATS_DESC_COUNTER(VCPU, instruction_iske),
134         STATS_DESC_COUNTER(VCPU, instruction_ri),
135         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
136         STATS_DESC_COUNTER(VCPU, instruction_sske),
137         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
138         STATS_DESC_COUNTER(VCPU, instruction_stsi),
139         STATS_DESC_COUNTER(VCPU, instruction_stfl),
140         STATS_DESC_COUNTER(VCPU, instruction_tb),
141         STATS_DESC_COUNTER(VCPU, instruction_tpi),
142         STATS_DESC_COUNTER(VCPU, instruction_tprot),
143         STATS_DESC_COUNTER(VCPU, instruction_tsch),
144         STATS_DESC_COUNTER(VCPU, instruction_sie),
145         STATS_DESC_COUNTER(VCPU, instruction_essa),
146         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
147         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
149         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
150         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
151         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
152         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
153         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
155         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
157         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
158         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
159         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
160         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
161         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
162         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
163         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
166         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
167         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
168         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
172         STATS_DESC_COUNTER(VCPU, pfault_sync)
173 };
174
175 const struct kvm_stats_header kvm_vcpu_stats_header = {
176         .name_size = KVM_STATS_NAME_SIZE,
177         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
178         .id_offset = sizeof(struct kvm_stats_header),
179         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
180         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
181                        sizeof(kvm_vcpu_stats_desc),
182 };
183
184 /* allow nested virtualization in KVM (if enabled by user space) */
185 static int nested;
186 module_param(nested, int, S_IRUGO);
187 MODULE_PARM_DESC(nested, "Nested virtualization support");
188
189 /* allow 1m huge page guest backing, if !nested */
190 static int hpage;
191 module_param(hpage, int, 0444);
192 MODULE_PARM_DESC(hpage, "1m huge page backing support");
193
194 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
195 static u8 halt_poll_max_steal = 10;
196 module_param(halt_poll_max_steal, byte, 0644);
197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
198
199 /* if set to true, the GISA will be initialized and used if available */
200 static bool use_gisa  = true;
201 module_param(use_gisa, bool, 0644);
202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
203
204 /* maximum diag9c forwarding per second */
205 unsigned int diag9c_forwarding_hz;
206 module_param(diag9c_forwarding_hz, uint, 0644);
207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
208
209 /*
210  * For now we handle at most 16 double words as this is what the s390 base
211  * kernel handles and stores in the prefix page. If we ever need to go beyond
212  * this, this requires changes to code, but the external uapi can stay.
213  */
214 #define SIZE_INTERNAL 16
215
216 /*
217  * Base feature mask that defines default mask for facilities. Consists of the
218  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
219  */
220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
221 /*
222  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
223  * and defines the facilities that can be enabled via a cpu model.
224  */
225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
226
227 static unsigned long kvm_s390_fac_size(void)
228 {
229         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
231         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
232                 sizeof(stfle_fac_list));
233
234         return SIZE_INTERNAL;
235 }
236
237 /* available cpu features supported by kvm */
238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
239 /* available subfunctions indicated via query / "test bit" */
240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
241
242 static struct gmap_notifier gmap_notifier;
243 static struct gmap_notifier vsie_gmap_notifier;
244 debug_info_t *kvm_s390_dbf;
245 debug_info_t *kvm_s390_dbf_uv;
246
247 /* Section: not file related */
248 int kvm_arch_hardware_enable(void)
249 {
250         /* every s390 is virtualization enabled ;-) */
251         return 0;
252 }
253
254 int kvm_arch_check_processor_compat(void *opaque)
255 {
256         return 0;
257 }
258
259 /* forward declarations */
260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
261                               unsigned long end);
262 static int sca_switch_to_extended(struct kvm *kvm);
263
264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
265 {
266         u8 delta_idx = 0;
267
268         /*
269          * The TOD jumps by delta, we have to compensate this by adding
270          * -delta to the epoch.
271          */
272         delta = -delta;
273
274         /* sign-extension - we're adding to signed values below */
275         if ((s64)delta < 0)
276                 delta_idx = -1;
277
278         scb->epoch += delta;
279         if (scb->ecd & ECD_MEF) {
280                 scb->epdx += delta_idx;
281                 if (scb->epoch < delta)
282                         scb->epdx += 1;
283         }
284 }
285
286 /*
287  * This callback is executed during stop_machine(). All CPUs are therefore
288  * temporarily stopped. In order not to change guest behavior, we have to
289  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
290  * so a CPU won't be stopped while calculating with the epoch.
291  */
292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
293                           void *v)
294 {
295         struct kvm *kvm;
296         struct kvm_vcpu *vcpu;
297         unsigned long i;
298         unsigned long long *delta = v;
299
300         list_for_each_entry(kvm, &vm_list, vm_list) {
301                 kvm_for_each_vcpu(i, vcpu, kvm) {
302                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
303                         if (i == 0) {
304                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
305                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
306                         }
307                         if (vcpu->arch.cputm_enabled)
308                                 vcpu->arch.cputm_start += *delta;
309                         if (vcpu->arch.vsie_block)
310                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
311                                                    *delta);
312                 }
313         }
314         return NOTIFY_OK;
315 }
316
317 static struct notifier_block kvm_clock_notifier = {
318         .notifier_call = kvm_clock_sync,
319 };
320
321 int kvm_arch_hardware_setup(void *opaque)
322 {
323         gmap_notifier.notifier_call = kvm_gmap_notifier;
324         gmap_register_pte_notifier(&gmap_notifier);
325         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
326         gmap_register_pte_notifier(&vsie_gmap_notifier);
327         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
328                                        &kvm_clock_notifier);
329         return 0;
330 }
331
332 void kvm_arch_hardware_unsetup(void)
333 {
334         gmap_unregister_pte_notifier(&gmap_notifier);
335         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
336         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
337                                          &kvm_clock_notifier);
338 }
339
340 static void allow_cpu_feat(unsigned long nr)
341 {
342         set_bit_inv(nr, kvm_s390_available_cpu_feat);
343 }
344
345 static inline int plo_test_bit(unsigned char nr)
346 {
347         unsigned long function = (unsigned long)nr | 0x100;
348         int cc;
349
350         asm volatile(
351                 "       lgr     0,%[function]\n"
352                 /* Parameter registers are ignored for "test bit" */
353                 "       plo     0,0,0,0(0)\n"
354                 "       ipm     %0\n"
355                 "       srl     %0,28\n"
356                 : "=d" (cc)
357                 : [function] "d" (function)
358                 : "cc", "0");
359         return cc == 0;
360 }
361
362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
363 {
364         asm volatile(
365                 "       lghi    0,0\n"
366                 "       lgr     1,%[query]\n"
367                 /* Parameter registers are ignored */
368                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
369                 :
370                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
371                 : "cc", "memory", "0", "1");
372 }
373
374 #define INSN_SORTL 0xb938
375 #define INSN_DFLTCC 0xb939
376
377 static void kvm_s390_cpu_feat_init(void)
378 {
379         int i;
380
381         for (i = 0; i < 256; ++i) {
382                 if (plo_test_bit(i))
383                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
384         }
385
386         if (test_facility(28)) /* TOD-clock steering */
387                 ptff(kvm_s390_available_subfunc.ptff,
388                      sizeof(kvm_s390_available_subfunc.ptff),
389                      PTFF_QAF);
390
391         if (test_facility(17)) { /* MSA */
392                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
393                               kvm_s390_available_subfunc.kmac);
394                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
395                               kvm_s390_available_subfunc.kmc);
396                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
397                               kvm_s390_available_subfunc.km);
398                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
399                               kvm_s390_available_subfunc.kimd);
400                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
401                               kvm_s390_available_subfunc.klmd);
402         }
403         if (test_facility(76)) /* MSA3 */
404                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
405                               kvm_s390_available_subfunc.pckmo);
406         if (test_facility(77)) { /* MSA4 */
407                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
408                               kvm_s390_available_subfunc.kmctr);
409                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
410                               kvm_s390_available_subfunc.kmf);
411                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
412                               kvm_s390_available_subfunc.kmo);
413                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
414                               kvm_s390_available_subfunc.pcc);
415         }
416         if (test_facility(57)) /* MSA5 */
417                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
418                               kvm_s390_available_subfunc.ppno);
419
420         if (test_facility(146)) /* MSA8 */
421                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
422                               kvm_s390_available_subfunc.kma);
423
424         if (test_facility(155)) /* MSA9 */
425                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
426                               kvm_s390_available_subfunc.kdsa);
427
428         if (test_facility(150)) /* SORTL */
429                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
430
431         if (test_facility(151)) /* DFLTCC */
432                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
433
434         if (MACHINE_HAS_ESOP)
435                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
436         /*
437          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
438          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
439          */
440         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
441             !test_facility(3) || !nested)
442                 return;
443         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
444         if (sclp.has_64bscao)
445                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
446         if (sclp.has_siif)
447                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
448         if (sclp.has_gpere)
449                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
450         if (sclp.has_gsls)
451                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
452         if (sclp.has_ib)
453                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
454         if (sclp.has_cei)
455                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
456         if (sclp.has_ibs)
457                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
458         if (sclp.has_kss)
459                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
460         /*
461          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
462          * all skey handling functions read/set the skey from the PGSTE
463          * instead of the real storage key.
464          *
465          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
466          * pages being detected as preserved although they are resident.
467          *
468          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
469          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
470          *
471          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
472          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
473          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
474          *
475          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
476          * cannot easily shadow the SCA because of the ipte lock.
477          */
478 }
479
480 int kvm_arch_init(void *opaque)
481 {
482         int rc = -ENOMEM;
483
484         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
485         if (!kvm_s390_dbf)
486                 return -ENOMEM;
487
488         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
489         if (!kvm_s390_dbf_uv)
490                 goto out;
491
492         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
493             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
494                 goto out;
495
496         kvm_s390_cpu_feat_init();
497
498         /* Register floating interrupt controller interface. */
499         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
500         if (rc) {
501                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
502                 goto out;
503         }
504
505         rc = kvm_s390_gib_init(GAL_ISC);
506         if (rc)
507                 goto out;
508
509         return 0;
510
511 out:
512         kvm_arch_exit();
513         return rc;
514 }
515
516 void kvm_arch_exit(void)
517 {
518         kvm_s390_gib_destroy();
519         debug_unregister(kvm_s390_dbf);
520         debug_unregister(kvm_s390_dbf_uv);
521 }
522
523 /* Section: device related */
524 long kvm_arch_dev_ioctl(struct file *filp,
525                         unsigned int ioctl, unsigned long arg)
526 {
527         if (ioctl == KVM_S390_ENABLE_SIE)
528                 return s390_enable_sie();
529         return -EINVAL;
530 }
531
532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
533 {
534         int r;
535
536         switch (ext) {
537         case KVM_CAP_S390_PSW:
538         case KVM_CAP_S390_GMAP:
539         case KVM_CAP_SYNC_MMU:
540 #ifdef CONFIG_KVM_S390_UCONTROL
541         case KVM_CAP_S390_UCONTROL:
542 #endif
543         case KVM_CAP_ASYNC_PF:
544         case KVM_CAP_SYNC_REGS:
545         case KVM_CAP_ONE_REG:
546         case KVM_CAP_ENABLE_CAP:
547         case KVM_CAP_S390_CSS_SUPPORT:
548         case KVM_CAP_IOEVENTFD:
549         case KVM_CAP_DEVICE_CTRL:
550         case KVM_CAP_S390_IRQCHIP:
551         case KVM_CAP_VM_ATTRIBUTES:
552         case KVM_CAP_MP_STATE:
553         case KVM_CAP_IMMEDIATE_EXIT:
554         case KVM_CAP_S390_INJECT_IRQ:
555         case KVM_CAP_S390_USER_SIGP:
556         case KVM_CAP_S390_USER_STSI:
557         case KVM_CAP_S390_SKEYS:
558         case KVM_CAP_S390_IRQ_STATE:
559         case KVM_CAP_S390_USER_INSTR0:
560         case KVM_CAP_S390_CMMA_MIGRATION:
561         case KVM_CAP_S390_AIS:
562         case KVM_CAP_S390_AIS_MIGRATION:
563         case KVM_CAP_S390_VCPU_RESETS:
564         case KVM_CAP_SET_GUEST_DEBUG:
565         case KVM_CAP_S390_DIAG318:
566         case KVM_CAP_S390_MEM_OP_EXTENSION:
567                 r = 1;
568                 break;
569         case KVM_CAP_SET_GUEST_DEBUG2:
570                 r = KVM_GUESTDBG_VALID_MASK;
571                 break;
572         case KVM_CAP_S390_HPAGE_1M:
573                 r = 0;
574                 if (hpage && !kvm_is_ucontrol(kvm))
575                         r = 1;
576                 break;
577         case KVM_CAP_S390_MEM_OP:
578                 r = MEM_OP_MAX_SIZE;
579                 break;
580         case KVM_CAP_NR_VCPUS:
581         case KVM_CAP_MAX_VCPUS:
582         case KVM_CAP_MAX_VCPU_ID:
583                 r = KVM_S390_BSCA_CPU_SLOTS;
584                 if (!kvm_s390_use_sca_entries())
585                         r = KVM_MAX_VCPUS;
586                 else if (sclp.has_esca && sclp.has_64bscao)
587                         r = KVM_S390_ESCA_CPU_SLOTS;
588                 if (ext == KVM_CAP_NR_VCPUS)
589                         r = min_t(unsigned int, num_online_cpus(), r);
590                 break;
591         case KVM_CAP_S390_COW:
592                 r = MACHINE_HAS_ESOP;
593                 break;
594         case KVM_CAP_S390_VECTOR_REGISTERS:
595                 r = MACHINE_HAS_VX;
596                 break;
597         case KVM_CAP_S390_RI:
598                 r = test_facility(64);
599                 break;
600         case KVM_CAP_S390_GS:
601                 r = test_facility(133);
602                 break;
603         case KVM_CAP_S390_BPB:
604                 r = test_facility(82);
605                 break;
606         case KVM_CAP_S390_PROTECTED:
607                 r = is_prot_virt_host();
608                 break;
609         default:
610                 r = 0;
611         }
612         return r;
613 }
614
615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
616 {
617         int i;
618         gfn_t cur_gfn, last_gfn;
619         unsigned long gaddr, vmaddr;
620         struct gmap *gmap = kvm->arch.gmap;
621         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
622
623         /* Loop over all guest segments */
624         cur_gfn = memslot->base_gfn;
625         last_gfn = memslot->base_gfn + memslot->npages;
626         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
627                 gaddr = gfn_to_gpa(cur_gfn);
628                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
629                 if (kvm_is_error_hva(vmaddr))
630                         continue;
631
632                 bitmap_zero(bitmap, _PAGE_ENTRIES);
633                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
634                 for (i = 0; i < _PAGE_ENTRIES; i++) {
635                         if (test_bit(i, bitmap))
636                                 mark_page_dirty(kvm, cur_gfn + i);
637                 }
638
639                 if (fatal_signal_pending(current))
640                         return;
641                 cond_resched();
642         }
643 }
644
645 /* Section: vm related */
646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
647
648 /*
649  * Get (and clear) the dirty memory log for a memory slot.
650  */
651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
652                                struct kvm_dirty_log *log)
653 {
654         int r;
655         unsigned long n;
656         struct kvm_memory_slot *memslot;
657         int is_dirty;
658
659         if (kvm_is_ucontrol(kvm))
660                 return -EINVAL;
661
662         mutex_lock(&kvm->slots_lock);
663
664         r = -EINVAL;
665         if (log->slot >= KVM_USER_MEM_SLOTS)
666                 goto out;
667
668         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
669         if (r)
670                 goto out;
671
672         /* Clear the dirty log */
673         if (is_dirty) {
674                 n = kvm_dirty_bitmap_bytes(memslot);
675                 memset(memslot->dirty_bitmap, 0, n);
676         }
677         r = 0;
678 out:
679         mutex_unlock(&kvm->slots_lock);
680         return r;
681 }
682
683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
684 {
685         unsigned long i;
686         struct kvm_vcpu *vcpu;
687
688         kvm_for_each_vcpu(i, vcpu, kvm) {
689                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
690         }
691 }
692
693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
694 {
695         int r;
696
697         if (cap->flags)
698                 return -EINVAL;
699
700         switch (cap->cap) {
701         case KVM_CAP_S390_IRQCHIP:
702                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
703                 kvm->arch.use_irqchip = 1;
704                 r = 0;
705                 break;
706         case KVM_CAP_S390_USER_SIGP:
707                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
708                 kvm->arch.user_sigp = 1;
709                 r = 0;
710                 break;
711         case KVM_CAP_S390_VECTOR_REGISTERS:
712                 mutex_lock(&kvm->lock);
713                 if (kvm->created_vcpus) {
714                         r = -EBUSY;
715                 } else if (MACHINE_HAS_VX) {
716                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
717                         set_kvm_facility(kvm->arch.model.fac_list, 129);
718                         if (test_facility(134)) {
719                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
720                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
721                         }
722                         if (test_facility(135)) {
723                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
724                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
725                         }
726                         if (test_facility(148)) {
727                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
728                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
729                         }
730                         if (test_facility(152)) {
731                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
732                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
733                         }
734                         if (test_facility(192)) {
735                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
736                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
737                         }
738                         r = 0;
739                 } else
740                         r = -EINVAL;
741                 mutex_unlock(&kvm->lock);
742                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
743                          r ? "(not available)" : "(success)");
744                 break;
745         case KVM_CAP_S390_RI:
746                 r = -EINVAL;
747                 mutex_lock(&kvm->lock);
748                 if (kvm->created_vcpus) {
749                         r = -EBUSY;
750                 } else if (test_facility(64)) {
751                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
752                         set_kvm_facility(kvm->arch.model.fac_list, 64);
753                         r = 0;
754                 }
755                 mutex_unlock(&kvm->lock);
756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
757                          r ? "(not available)" : "(success)");
758                 break;
759         case KVM_CAP_S390_AIS:
760                 mutex_lock(&kvm->lock);
761                 if (kvm->created_vcpus) {
762                         r = -EBUSY;
763                 } else {
764                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
765                         set_kvm_facility(kvm->arch.model.fac_list, 72);
766                         r = 0;
767                 }
768                 mutex_unlock(&kvm->lock);
769                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
770                          r ? "(not available)" : "(success)");
771                 break;
772         case KVM_CAP_S390_GS:
773                 r = -EINVAL;
774                 mutex_lock(&kvm->lock);
775                 if (kvm->created_vcpus) {
776                         r = -EBUSY;
777                 } else if (test_facility(133)) {
778                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
779                         set_kvm_facility(kvm->arch.model.fac_list, 133);
780                         r = 0;
781                 }
782                 mutex_unlock(&kvm->lock);
783                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
784                          r ? "(not available)" : "(success)");
785                 break;
786         case KVM_CAP_S390_HPAGE_1M:
787                 mutex_lock(&kvm->lock);
788                 if (kvm->created_vcpus)
789                         r = -EBUSY;
790                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
791                         r = -EINVAL;
792                 else {
793                         r = 0;
794                         mmap_write_lock(kvm->mm);
795                         kvm->mm->context.allow_gmap_hpage_1m = 1;
796                         mmap_write_unlock(kvm->mm);
797                         /*
798                          * We might have to create fake 4k page
799                          * tables. To avoid that the hardware works on
800                          * stale PGSTEs, we emulate these instructions.
801                          */
802                         kvm->arch.use_skf = 0;
803                         kvm->arch.use_pfmfi = 0;
804                 }
805                 mutex_unlock(&kvm->lock);
806                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
807                          r ? "(not available)" : "(success)");
808                 break;
809         case KVM_CAP_S390_USER_STSI:
810                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
811                 kvm->arch.user_stsi = 1;
812                 r = 0;
813                 break;
814         case KVM_CAP_S390_USER_INSTR0:
815                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
816                 kvm->arch.user_instr0 = 1;
817                 icpt_operexc_on_all_vcpus(kvm);
818                 r = 0;
819                 break;
820         default:
821                 r = -EINVAL;
822                 break;
823         }
824         return r;
825 }
826
827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
828 {
829         int ret;
830
831         switch (attr->attr) {
832         case KVM_S390_VM_MEM_LIMIT_SIZE:
833                 ret = 0;
834                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
835                          kvm->arch.mem_limit);
836                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
837                         ret = -EFAULT;
838                 break;
839         default:
840                 ret = -ENXIO;
841                 break;
842         }
843         return ret;
844 }
845
846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
847 {
848         int ret;
849         unsigned int idx;
850         switch (attr->attr) {
851         case KVM_S390_VM_MEM_ENABLE_CMMA:
852                 ret = -ENXIO;
853                 if (!sclp.has_cmma)
854                         break;
855
856                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
857                 mutex_lock(&kvm->lock);
858                 if (kvm->created_vcpus)
859                         ret = -EBUSY;
860                 else if (kvm->mm->context.allow_gmap_hpage_1m)
861                         ret = -EINVAL;
862                 else {
863                         kvm->arch.use_cmma = 1;
864                         /* Not compatible with cmma. */
865                         kvm->arch.use_pfmfi = 0;
866                         ret = 0;
867                 }
868                 mutex_unlock(&kvm->lock);
869                 break;
870         case KVM_S390_VM_MEM_CLR_CMMA:
871                 ret = -ENXIO;
872                 if (!sclp.has_cmma)
873                         break;
874                 ret = -EINVAL;
875                 if (!kvm->arch.use_cmma)
876                         break;
877
878                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
879                 mutex_lock(&kvm->lock);
880                 idx = srcu_read_lock(&kvm->srcu);
881                 s390_reset_cmma(kvm->arch.gmap->mm);
882                 srcu_read_unlock(&kvm->srcu, idx);
883                 mutex_unlock(&kvm->lock);
884                 ret = 0;
885                 break;
886         case KVM_S390_VM_MEM_LIMIT_SIZE: {
887                 unsigned long new_limit;
888
889                 if (kvm_is_ucontrol(kvm))
890                         return -EINVAL;
891
892                 if (get_user(new_limit, (u64 __user *)attr->addr))
893                         return -EFAULT;
894
895                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
896                     new_limit > kvm->arch.mem_limit)
897                         return -E2BIG;
898
899                 if (!new_limit)
900                         return -EINVAL;
901
902                 /* gmap_create takes last usable address */
903                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
904                         new_limit -= 1;
905
906                 ret = -EBUSY;
907                 mutex_lock(&kvm->lock);
908                 if (!kvm->created_vcpus) {
909                         /* gmap_create will round the limit up */
910                         struct gmap *new = gmap_create(current->mm, new_limit);
911
912                         if (!new) {
913                                 ret = -ENOMEM;
914                         } else {
915                                 gmap_remove(kvm->arch.gmap);
916                                 new->private = kvm;
917                                 kvm->arch.gmap = new;
918                                 ret = 0;
919                         }
920                 }
921                 mutex_unlock(&kvm->lock);
922                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
923                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
924                          (void *) kvm->arch.gmap->asce);
925                 break;
926         }
927         default:
928                 ret = -ENXIO;
929                 break;
930         }
931         return ret;
932 }
933
934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
935
936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
937 {
938         struct kvm_vcpu *vcpu;
939         unsigned long i;
940
941         kvm_s390_vcpu_block_all(kvm);
942
943         kvm_for_each_vcpu(i, vcpu, kvm) {
944                 kvm_s390_vcpu_crypto_setup(vcpu);
945                 /* recreate the shadow crycb by leaving the VSIE handler */
946                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
947         }
948
949         kvm_s390_vcpu_unblock_all(kvm);
950 }
951
952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
953 {
954         mutex_lock(&kvm->lock);
955         switch (attr->attr) {
956         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
957                 if (!test_kvm_facility(kvm, 76)) {
958                         mutex_unlock(&kvm->lock);
959                         return -EINVAL;
960                 }
961                 get_random_bytes(
962                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
963                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
964                 kvm->arch.crypto.aes_kw = 1;
965                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
966                 break;
967         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
968                 if (!test_kvm_facility(kvm, 76)) {
969                         mutex_unlock(&kvm->lock);
970                         return -EINVAL;
971                 }
972                 get_random_bytes(
973                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
974                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
975                 kvm->arch.crypto.dea_kw = 1;
976                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
977                 break;
978         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
979                 if (!test_kvm_facility(kvm, 76)) {
980                         mutex_unlock(&kvm->lock);
981                         return -EINVAL;
982                 }
983                 kvm->arch.crypto.aes_kw = 0;
984                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
985                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
986                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
987                 break;
988         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
989                 if (!test_kvm_facility(kvm, 76)) {
990                         mutex_unlock(&kvm->lock);
991                         return -EINVAL;
992                 }
993                 kvm->arch.crypto.dea_kw = 0;
994                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
995                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
996                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
997                 break;
998         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
999                 if (!ap_instructions_available()) {
1000                         mutex_unlock(&kvm->lock);
1001                         return -EOPNOTSUPP;
1002                 }
1003                 kvm->arch.crypto.apie = 1;
1004                 break;
1005         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006                 if (!ap_instructions_available()) {
1007                         mutex_unlock(&kvm->lock);
1008                         return -EOPNOTSUPP;
1009                 }
1010                 kvm->arch.crypto.apie = 0;
1011                 break;
1012         default:
1013                 mutex_unlock(&kvm->lock);
1014                 return -ENXIO;
1015         }
1016
1017         kvm_s390_vcpu_crypto_reset_all(kvm);
1018         mutex_unlock(&kvm->lock);
1019         return 0;
1020 }
1021
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024         unsigned long cx;
1025         struct kvm_vcpu *vcpu;
1026
1027         kvm_for_each_vcpu(cx, vcpu, kvm)
1028                 kvm_s390_sync_request(req, vcpu);
1029 }
1030
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037         struct kvm_memory_slot *ms;
1038         struct kvm_memslots *slots;
1039         unsigned long ram_pages = 0;
1040         int bkt;
1041
1042         /* migration mode already enabled */
1043         if (kvm->arch.migration_mode)
1044                 return 0;
1045         slots = kvm_memslots(kvm);
1046         if (!slots || kvm_memslots_empty(slots))
1047                 return -EINVAL;
1048
1049         if (!kvm->arch.use_cmma) {
1050                 kvm->arch.migration_mode = 1;
1051                 return 0;
1052         }
1053         /* mark all the pages in active slots as dirty */
1054         kvm_for_each_memslot(ms, bkt, slots) {
1055                 if (!ms->dirty_bitmap)
1056                         return -EINVAL;
1057                 /*
1058                  * The second half of the bitmap is only used on x86,
1059                  * and would be wasted otherwise, so we put it to good
1060                  * use here to keep track of the state of the storage
1061                  * attributes.
1062                  */
1063                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064                 ram_pages += ms->npages;
1065         }
1066         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067         kvm->arch.migration_mode = 1;
1068         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069         return 0;
1070 }
1071
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078         /* migration mode already disabled */
1079         if (!kvm->arch.migration_mode)
1080                 return 0;
1081         kvm->arch.migration_mode = 0;
1082         if (kvm->arch.use_cmma)
1083                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084         return 0;
1085 }
1086
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088                                      struct kvm_device_attr *attr)
1089 {
1090         int res = -ENXIO;
1091
1092         mutex_lock(&kvm->slots_lock);
1093         switch (attr->attr) {
1094         case KVM_S390_VM_MIGRATION_START:
1095                 res = kvm_s390_vm_start_migration(kvm);
1096                 break;
1097         case KVM_S390_VM_MIGRATION_STOP:
1098                 res = kvm_s390_vm_stop_migration(kvm);
1099                 break;
1100         default:
1101                 break;
1102         }
1103         mutex_unlock(&kvm->slots_lock);
1104
1105         return res;
1106 }
1107
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109                                      struct kvm_device_attr *attr)
1110 {
1111         u64 mig = kvm->arch.migration_mode;
1112
1113         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114                 return -ENXIO;
1115
1116         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117                 return -EFAULT;
1118         return 0;
1119 }
1120
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         struct kvm_s390_vm_tod_clock gtod;
1124
1125         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126                 return -EFAULT;
1127
1128         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129                 return -EINVAL;
1130         kvm_s390_set_tod_clock(kvm, &gtod);
1131
1132         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133                 gtod.epoch_idx, gtod.tod);
1134
1135         return 0;
1136 }
1137
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         u8 gtod_high;
1141
1142         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143                                            sizeof(gtod_high)))
1144                 return -EFAULT;
1145
1146         if (gtod_high != 0)
1147                 return -EINVAL;
1148         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149
1150         return 0;
1151 }
1152
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155         struct kvm_s390_vm_tod_clock gtod = { 0 };
1156
1157         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158                            sizeof(gtod.tod)))
1159                 return -EFAULT;
1160
1161         kvm_s390_set_tod_clock(kvm, &gtod);
1162         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163         return 0;
1164 }
1165
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         int ret;
1169
1170         if (attr->flags)
1171                 return -EINVAL;
1172
1173         switch (attr->attr) {
1174         case KVM_S390_VM_TOD_EXT:
1175                 ret = kvm_s390_set_tod_ext(kvm, attr);
1176                 break;
1177         case KVM_S390_VM_TOD_HIGH:
1178                 ret = kvm_s390_set_tod_high(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_LOW:
1181                 ret = kvm_s390_set_tod_low(kvm, attr);
1182                 break;
1183         default:
1184                 ret = -ENXIO;
1185                 break;
1186         }
1187         return ret;
1188 }
1189
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191                                    struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193         union tod_clock clk;
1194
1195         preempt_disable();
1196
1197         store_tod_clock_ext(&clk);
1198
1199         gtod->tod = clk.tod + kvm->arch.epoch;
1200         gtod->epoch_idx = 0;
1201         if (test_kvm_facility(kvm, 139)) {
1202                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203                 if (gtod->tod < clk.tod)
1204                         gtod->epoch_idx += 1;
1205         }
1206
1207         preempt_enable();
1208 }
1209
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212         struct kvm_s390_vm_tod_clock gtod;
1213
1214         memset(&gtod, 0, sizeof(gtod));
1215         kvm_s390_get_tod_clock(kvm, &gtod);
1216         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217                 return -EFAULT;
1218
1219         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220                 gtod.epoch_idx, gtod.tod);
1221         return 0;
1222 }
1223
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226         u8 gtod_high = 0;
1227
1228         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229                                          sizeof(gtod_high)))
1230                 return -EFAULT;
1231         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232
1233         return 0;
1234 }
1235
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238         u64 gtod;
1239
1240         gtod = kvm_s390_get_tod_clock_fast(kvm);
1241         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242                 return -EFAULT;
1243         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244
1245         return 0;
1246 }
1247
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250         int ret;
1251
1252         if (attr->flags)
1253                 return -EINVAL;
1254
1255         switch (attr->attr) {
1256         case KVM_S390_VM_TOD_EXT:
1257                 ret = kvm_s390_get_tod_ext(kvm, attr);
1258                 break;
1259         case KVM_S390_VM_TOD_HIGH:
1260                 ret = kvm_s390_get_tod_high(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_TOD_LOW:
1263                 ret = kvm_s390_get_tod_low(kvm, attr);
1264                 break;
1265         default:
1266                 ret = -ENXIO;
1267                 break;
1268         }
1269         return ret;
1270 }
1271
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274         struct kvm_s390_vm_cpu_processor *proc;
1275         u16 lowest_ibc, unblocked_ibc;
1276         int ret = 0;
1277
1278         mutex_lock(&kvm->lock);
1279         if (kvm->created_vcpus) {
1280                 ret = -EBUSY;
1281                 goto out;
1282         }
1283         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284         if (!proc) {
1285                 ret = -ENOMEM;
1286                 goto out;
1287         }
1288         if (!copy_from_user(proc, (void __user *)attr->addr,
1289                             sizeof(*proc))) {
1290                 kvm->arch.model.cpuid = proc->cpuid;
1291                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292                 unblocked_ibc = sclp.ibc & 0xfff;
1293                 if (lowest_ibc && proc->ibc) {
1294                         if (proc->ibc > unblocked_ibc)
1295                                 kvm->arch.model.ibc = unblocked_ibc;
1296                         else if (proc->ibc < lowest_ibc)
1297                                 kvm->arch.model.ibc = lowest_ibc;
1298                         else
1299                                 kvm->arch.model.ibc = proc->ibc;
1300                 }
1301                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1303                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304                          kvm->arch.model.ibc,
1305                          kvm->arch.model.cpuid);
1306                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307                          kvm->arch.model.fac_list[0],
1308                          kvm->arch.model.fac_list[1],
1309                          kvm->arch.model.fac_list[2]);
1310         } else
1311                 ret = -EFAULT;
1312         kfree(proc);
1313 out:
1314         mutex_unlock(&kvm->lock);
1315         return ret;
1316 }
1317
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319                                        struct kvm_device_attr *attr)
1320 {
1321         struct kvm_s390_vm_cpu_feat data;
1322
1323         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324                 return -EFAULT;
1325         if (!bitmap_subset((unsigned long *) data.feat,
1326                            kvm_s390_available_cpu_feat,
1327                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1328                 return -EINVAL;
1329
1330         mutex_lock(&kvm->lock);
1331         if (kvm->created_vcpus) {
1332                 mutex_unlock(&kvm->lock);
1333                 return -EBUSY;
1334         }
1335         bitmap_from_arr64(kvm->arch.cpu_feat, data.feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1336         mutex_unlock(&kvm->lock);
1337         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1338                          data.feat[0],
1339                          data.feat[1],
1340                          data.feat[2]);
1341         return 0;
1342 }
1343
1344 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1345                                           struct kvm_device_attr *attr)
1346 {
1347         mutex_lock(&kvm->lock);
1348         if (kvm->created_vcpus) {
1349                 mutex_unlock(&kvm->lock);
1350                 return -EBUSY;
1351         }
1352
1353         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1354                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1355                 mutex_unlock(&kvm->lock);
1356                 return -EFAULT;
1357         }
1358         mutex_unlock(&kvm->lock);
1359
1360         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1365         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1374         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1377         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1380         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1383         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1384                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1386         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1389         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1392         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1393                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1395         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1396                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1398         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1399                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1401         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1402                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1404         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1405                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1407         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1408                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1412         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1413                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1416                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1417
1418         return 0;
1419 }
1420
1421 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1422 {
1423         int ret = -ENXIO;
1424
1425         switch (attr->attr) {
1426         case KVM_S390_VM_CPU_PROCESSOR:
1427                 ret = kvm_s390_set_processor(kvm, attr);
1428                 break;
1429         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1430                 ret = kvm_s390_set_processor_feat(kvm, attr);
1431                 break;
1432         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1433                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1434                 break;
1435         }
1436         return ret;
1437 }
1438
1439 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1440 {
1441         struct kvm_s390_vm_cpu_processor *proc;
1442         int ret = 0;
1443
1444         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1445         if (!proc) {
1446                 ret = -ENOMEM;
1447                 goto out;
1448         }
1449         proc->cpuid = kvm->arch.model.cpuid;
1450         proc->ibc = kvm->arch.model.ibc;
1451         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1452                S390_ARCH_FAC_LIST_SIZE_BYTE);
1453         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1454                  kvm->arch.model.ibc,
1455                  kvm->arch.model.cpuid);
1456         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1457                  kvm->arch.model.fac_list[0],
1458                  kvm->arch.model.fac_list[1],
1459                  kvm->arch.model.fac_list[2]);
1460         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1461                 ret = -EFAULT;
1462         kfree(proc);
1463 out:
1464         return ret;
1465 }
1466
1467 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1468 {
1469         struct kvm_s390_vm_cpu_machine *mach;
1470         int ret = 0;
1471
1472         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1473         if (!mach) {
1474                 ret = -ENOMEM;
1475                 goto out;
1476         }
1477         get_cpu_id((struct cpuid *) &mach->cpuid);
1478         mach->ibc = sclp.ibc;
1479         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1480                S390_ARCH_FAC_LIST_SIZE_BYTE);
1481         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1482                sizeof(stfle_fac_list));
1483         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1484                  kvm->arch.model.ibc,
1485                  kvm->arch.model.cpuid);
1486         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1487                  mach->fac_mask[0],
1488                  mach->fac_mask[1],
1489                  mach->fac_mask[2]);
1490         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1491                  mach->fac_list[0],
1492                  mach->fac_list[1],
1493                  mach->fac_list[2]);
1494         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1495                 ret = -EFAULT;
1496         kfree(mach);
1497 out:
1498         return ret;
1499 }
1500
1501 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1502                                        struct kvm_device_attr *attr)
1503 {
1504         struct kvm_s390_vm_cpu_feat data;
1505
1506         bitmap_to_arr64(data.feat, kvm->arch.cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1507         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1508                 return -EFAULT;
1509         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1510                          data.feat[0],
1511                          data.feat[1],
1512                          data.feat[2]);
1513         return 0;
1514 }
1515
1516 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1517                                      struct kvm_device_attr *attr)
1518 {
1519         struct kvm_s390_vm_cpu_feat data;
1520
1521         bitmap_to_arr64(data.feat, kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
1522         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1523                 return -EFAULT;
1524         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1525                          data.feat[0],
1526                          data.feat[1],
1527                          data.feat[2]);
1528         return 0;
1529 }
1530
1531 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1532                                           struct kvm_device_attr *attr)
1533 {
1534         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1535             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1536                 return -EFAULT;
1537
1538         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1543         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1546         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1555         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1558         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1561         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1564         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1565                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1567         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1570         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1573         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1576         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1577                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1579         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1580                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1582         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1583                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1585         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1586                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1588                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1589                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1590         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1594                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1595
1596         return 0;
1597 }
1598
1599 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1600                                         struct kvm_device_attr *attr)
1601 {
1602         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1603             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1604                 return -EFAULT;
1605
1606         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1607                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1608                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1609                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1610                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1611         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1614         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1623         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1624                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1625                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1626         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1627                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1629         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1630                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1631                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1632         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1633                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1634                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1635         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1636                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1637                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1638         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1639                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1640                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1641         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1642                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1643                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1644         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1645                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1646                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1647         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1648                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1649                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1650         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1651                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1652                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1653         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1654                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1655                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1656                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1657                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1658         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1659                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1660                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1661                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1662                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1663
1664         return 0;
1665 }
1666
1667 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1668 {
1669         int ret = -ENXIO;
1670
1671         switch (attr->attr) {
1672         case KVM_S390_VM_CPU_PROCESSOR:
1673                 ret = kvm_s390_get_processor(kvm, attr);
1674                 break;
1675         case KVM_S390_VM_CPU_MACHINE:
1676                 ret = kvm_s390_get_machine(kvm, attr);
1677                 break;
1678         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1679                 ret = kvm_s390_get_processor_feat(kvm, attr);
1680                 break;
1681         case KVM_S390_VM_CPU_MACHINE_FEAT:
1682                 ret = kvm_s390_get_machine_feat(kvm, attr);
1683                 break;
1684         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1685                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1686                 break;
1687         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1688                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1689                 break;
1690         }
1691         return ret;
1692 }
1693
1694 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1695 {
1696         int ret;
1697
1698         switch (attr->group) {
1699         case KVM_S390_VM_MEM_CTRL:
1700                 ret = kvm_s390_set_mem_control(kvm, attr);
1701                 break;
1702         case KVM_S390_VM_TOD:
1703                 ret = kvm_s390_set_tod(kvm, attr);
1704                 break;
1705         case KVM_S390_VM_CPU_MODEL:
1706                 ret = kvm_s390_set_cpu_model(kvm, attr);
1707                 break;
1708         case KVM_S390_VM_CRYPTO:
1709                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1710                 break;
1711         case KVM_S390_VM_MIGRATION:
1712                 ret = kvm_s390_vm_set_migration(kvm, attr);
1713                 break;
1714         default:
1715                 ret = -ENXIO;
1716                 break;
1717         }
1718
1719         return ret;
1720 }
1721
1722 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1723 {
1724         int ret;
1725
1726         switch (attr->group) {
1727         case KVM_S390_VM_MEM_CTRL:
1728                 ret = kvm_s390_get_mem_control(kvm, attr);
1729                 break;
1730         case KVM_S390_VM_TOD:
1731                 ret = kvm_s390_get_tod(kvm, attr);
1732                 break;
1733         case KVM_S390_VM_CPU_MODEL:
1734                 ret = kvm_s390_get_cpu_model(kvm, attr);
1735                 break;
1736         case KVM_S390_VM_MIGRATION:
1737                 ret = kvm_s390_vm_get_migration(kvm, attr);
1738                 break;
1739         default:
1740                 ret = -ENXIO;
1741                 break;
1742         }
1743
1744         return ret;
1745 }
1746
1747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1748 {
1749         int ret;
1750
1751         switch (attr->group) {
1752         case KVM_S390_VM_MEM_CTRL:
1753                 switch (attr->attr) {
1754                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1755                 case KVM_S390_VM_MEM_CLR_CMMA:
1756                         ret = sclp.has_cmma ? 0 : -ENXIO;
1757                         break;
1758                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1759                         ret = 0;
1760                         break;
1761                 default:
1762                         ret = -ENXIO;
1763                         break;
1764                 }
1765                 break;
1766         case KVM_S390_VM_TOD:
1767                 switch (attr->attr) {
1768                 case KVM_S390_VM_TOD_LOW:
1769                 case KVM_S390_VM_TOD_HIGH:
1770                         ret = 0;
1771                         break;
1772                 default:
1773                         ret = -ENXIO;
1774                         break;
1775                 }
1776                 break;
1777         case KVM_S390_VM_CPU_MODEL:
1778                 switch (attr->attr) {
1779                 case KVM_S390_VM_CPU_PROCESSOR:
1780                 case KVM_S390_VM_CPU_MACHINE:
1781                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1782                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1783                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1784                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1785                         ret = 0;
1786                         break;
1787                 default:
1788                         ret = -ENXIO;
1789                         break;
1790                 }
1791                 break;
1792         case KVM_S390_VM_CRYPTO:
1793                 switch (attr->attr) {
1794                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1795                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1796                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1797                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1798                         ret = 0;
1799                         break;
1800                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1801                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1802                         ret = ap_instructions_available() ? 0 : -ENXIO;
1803                         break;
1804                 default:
1805                         ret = -ENXIO;
1806                         break;
1807                 }
1808                 break;
1809         case KVM_S390_VM_MIGRATION:
1810                 ret = 0;
1811                 break;
1812         default:
1813                 ret = -ENXIO;
1814                 break;
1815         }
1816
1817         return ret;
1818 }
1819
1820 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1821 {
1822         uint8_t *keys;
1823         uint64_t hva;
1824         int srcu_idx, i, r = 0;
1825
1826         if (args->flags != 0)
1827                 return -EINVAL;
1828
1829         /* Is this guest using storage keys? */
1830         if (!mm_uses_skeys(current->mm))
1831                 return KVM_S390_GET_SKEYS_NONE;
1832
1833         /* Enforce sane limit on memory allocation */
1834         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1835                 return -EINVAL;
1836
1837         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1838         if (!keys)
1839                 return -ENOMEM;
1840
1841         mmap_read_lock(current->mm);
1842         srcu_idx = srcu_read_lock(&kvm->srcu);
1843         for (i = 0; i < args->count; i++) {
1844                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1845                 if (kvm_is_error_hva(hva)) {
1846                         r = -EFAULT;
1847                         break;
1848                 }
1849
1850                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1851                 if (r)
1852                         break;
1853         }
1854         srcu_read_unlock(&kvm->srcu, srcu_idx);
1855         mmap_read_unlock(current->mm);
1856
1857         if (!r) {
1858                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1859                                  sizeof(uint8_t) * args->count);
1860                 if (r)
1861                         r = -EFAULT;
1862         }
1863
1864         kvfree(keys);
1865         return r;
1866 }
1867
1868 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1869 {
1870         uint8_t *keys;
1871         uint64_t hva;
1872         int srcu_idx, i, r = 0;
1873         bool unlocked;
1874
1875         if (args->flags != 0)
1876                 return -EINVAL;
1877
1878         /* Enforce sane limit on memory allocation */
1879         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1880                 return -EINVAL;
1881
1882         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1883         if (!keys)
1884                 return -ENOMEM;
1885
1886         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1887                            sizeof(uint8_t) * args->count);
1888         if (r) {
1889                 r = -EFAULT;
1890                 goto out;
1891         }
1892
1893         /* Enable storage key handling for the guest */
1894         r = s390_enable_skey();
1895         if (r)
1896                 goto out;
1897
1898         i = 0;
1899         mmap_read_lock(current->mm);
1900         srcu_idx = srcu_read_lock(&kvm->srcu);
1901         while (i < args->count) {
1902                 unlocked = false;
1903                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1904                 if (kvm_is_error_hva(hva)) {
1905                         r = -EFAULT;
1906                         break;
1907                 }
1908
1909                 /* Lowest order bit is reserved */
1910                 if (keys[i] & 0x01) {
1911                         r = -EINVAL;
1912                         break;
1913                 }
1914
1915                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1916                 if (r) {
1917                         r = fixup_user_fault(current->mm, hva,
1918                                              FAULT_FLAG_WRITE, &unlocked);
1919                         if (r)
1920                                 break;
1921                 }
1922                 if (!r)
1923                         i++;
1924         }
1925         srcu_read_unlock(&kvm->srcu, srcu_idx);
1926         mmap_read_unlock(current->mm);
1927 out:
1928         kvfree(keys);
1929         return r;
1930 }
1931
1932 /*
1933  * Base address and length must be sent at the start of each block, therefore
1934  * it's cheaper to send some clean data, as long as it's less than the size of
1935  * two longs.
1936  */
1937 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1938 /* for consistency */
1939 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1940
1941 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1942                               u8 *res, unsigned long bufsize)
1943 {
1944         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1945
1946         args->count = 0;
1947         while (args->count < bufsize) {
1948                 hva = gfn_to_hva(kvm, cur_gfn);
1949                 /*
1950                  * We return an error if the first value was invalid, but we
1951                  * return successfully if at least one value was copied.
1952                  */
1953                 if (kvm_is_error_hva(hva))
1954                         return args->count ? 0 : -EFAULT;
1955                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1956                         pgstev = 0;
1957                 res[args->count++] = (pgstev >> 24) & 0x43;
1958                 cur_gfn++;
1959         }
1960
1961         return 0;
1962 }
1963
1964 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1965                                                      gfn_t gfn)
1966 {
1967         return ____gfn_to_memslot(slots, gfn, true);
1968 }
1969
1970 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1971                                               unsigned long cur_gfn)
1972 {
1973         struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1974         unsigned long ofs = cur_gfn - ms->base_gfn;
1975         struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1976
1977         if (ms->base_gfn + ms->npages <= cur_gfn) {
1978                 mnode = rb_next(mnode);
1979                 /* If we are above the highest slot, wrap around */
1980                 if (!mnode)
1981                         mnode = rb_first(&slots->gfn_tree);
1982
1983                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1984                 ofs = 0;
1985         }
1986         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1987         while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1988                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1989                 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1990         }
1991         return ms->base_gfn + ofs;
1992 }
1993
1994 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1995                              u8 *res, unsigned long bufsize)
1996 {
1997         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1998         struct kvm_memslots *slots = kvm_memslots(kvm);
1999         struct kvm_memory_slot *ms;
2000
2001         if (unlikely(kvm_memslots_empty(slots)))
2002                 return 0;
2003
2004         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2005         ms = gfn_to_memslot(kvm, cur_gfn);
2006         args->count = 0;
2007         args->start_gfn = cur_gfn;
2008         if (!ms)
2009                 return 0;
2010         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2011         mem_end = kvm_s390_get_gfn_end(slots);
2012
2013         while (args->count < bufsize) {
2014                 hva = gfn_to_hva(kvm, cur_gfn);
2015                 if (kvm_is_error_hva(hva))
2016                         return 0;
2017                 /* Decrement only if we actually flipped the bit to 0 */
2018                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2019                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2020                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2021                         pgstev = 0;
2022                 /* Save the value */
2023                 res[args->count++] = (pgstev >> 24) & 0x43;
2024                 /* If the next bit is too far away, stop. */
2025                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2026                         return 0;
2027                 /* If we reached the previous "next", find the next one */
2028                 if (cur_gfn == next_gfn)
2029                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2030                 /* Reached the end of memory or of the buffer, stop */
2031                 if ((next_gfn >= mem_end) ||
2032                     (next_gfn - args->start_gfn >= bufsize))
2033                         return 0;
2034                 cur_gfn++;
2035                 /* Reached the end of the current memslot, take the next one. */
2036                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2037                         ms = gfn_to_memslot(kvm, cur_gfn);
2038                         if (!ms)
2039                                 return 0;
2040                 }
2041         }
2042         return 0;
2043 }
2044
2045 /*
2046  * This function searches for the next page with dirty CMMA attributes, and
2047  * saves the attributes in the buffer up to either the end of the buffer or
2048  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2049  * no trailing clean bytes are saved.
2050  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2051  * output buffer will indicate 0 as length.
2052  */
2053 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2054                                   struct kvm_s390_cmma_log *args)
2055 {
2056         unsigned long bufsize;
2057         int srcu_idx, peek, ret;
2058         u8 *values;
2059
2060         if (!kvm->arch.use_cmma)
2061                 return -ENXIO;
2062         /* Invalid/unsupported flags were specified */
2063         if (args->flags & ~KVM_S390_CMMA_PEEK)
2064                 return -EINVAL;
2065         /* Migration mode query, and we are not doing a migration */
2066         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2067         if (!peek && !kvm->arch.migration_mode)
2068                 return -EINVAL;
2069         /* CMMA is disabled or was not used, or the buffer has length zero */
2070         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2071         if (!bufsize || !kvm->mm->context.uses_cmm) {
2072                 memset(args, 0, sizeof(*args));
2073                 return 0;
2074         }
2075         /* We are not peeking, and there are no dirty pages */
2076         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2077                 memset(args, 0, sizeof(*args));
2078                 return 0;
2079         }
2080
2081         values = vmalloc(bufsize);
2082         if (!values)
2083                 return -ENOMEM;
2084
2085         mmap_read_lock(kvm->mm);
2086         srcu_idx = srcu_read_lock(&kvm->srcu);
2087         if (peek)
2088                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2089         else
2090                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2091         srcu_read_unlock(&kvm->srcu, srcu_idx);
2092         mmap_read_unlock(kvm->mm);
2093
2094         if (kvm->arch.migration_mode)
2095                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2096         else
2097                 args->remaining = 0;
2098
2099         if (copy_to_user((void __user *)args->values, values, args->count))
2100                 ret = -EFAULT;
2101
2102         vfree(values);
2103         return ret;
2104 }
2105
2106 /*
2107  * This function sets the CMMA attributes for the given pages. If the input
2108  * buffer has zero length, no action is taken, otherwise the attributes are
2109  * set and the mm->context.uses_cmm flag is set.
2110  */
2111 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2112                                   const struct kvm_s390_cmma_log *args)
2113 {
2114         unsigned long hva, mask, pgstev, i;
2115         uint8_t *bits;
2116         int srcu_idx, r = 0;
2117
2118         mask = args->mask;
2119
2120         if (!kvm->arch.use_cmma)
2121                 return -ENXIO;
2122         /* invalid/unsupported flags */
2123         if (args->flags != 0)
2124                 return -EINVAL;
2125         /* Enforce sane limit on memory allocation */
2126         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2127                 return -EINVAL;
2128         /* Nothing to do */
2129         if (args->count == 0)
2130                 return 0;
2131
2132         bits = vmalloc(array_size(sizeof(*bits), args->count));
2133         if (!bits)
2134                 return -ENOMEM;
2135
2136         r = copy_from_user(bits, (void __user *)args->values, args->count);
2137         if (r) {
2138                 r = -EFAULT;
2139                 goto out;
2140         }
2141
2142         mmap_read_lock(kvm->mm);
2143         srcu_idx = srcu_read_lock(&kvm->srcu);
2144         for (i = 0; i < args->count; i++) {
2145                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2146                 if (kvm_is_error_hva(hva)) {
2147                         r = -EFAULT;
2148                         break;
2149                 }
2150
2151                 pgstev = bits[i];
2152                 pgstev = pgstev << 24;
2153                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2154                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2155         }
2156         srcu_read_unlock(&kvm->srcu, srcu_idx);
2157         mmap_read_unlock(kvm->mm);
2158
2159         if (!kvm->mm->context.uses_cmm) {
2160                 mmap_write_lock(kvm->mm);
2161                 kvm->mm->context.uses_cmm = 1;
2162                 mmap_write_unlock(kvm->mm);
2163         }
2164 out:
2165         vfree(bits);
2166         return r;
2167 }
2168
2169 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2170 {
2171         struct kvm_vcpu *vcpu;
2172         u16 rc, rrc;
2173         int ret = 0;
2174         unsigned long i;
2175
2176         /*
2177          * We ignore failures and try to destroy as many CPUs as possible.
2178          * At the same time we must not free the assigned resources when
2179          * this fails, as the ultravisor has still access to that memory.
2180          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2181          * behind.
2182          * We want to return the first failure rc and rrc, though.
2183          */
2184         kvm_for_each_vcpu(i, vcpu, kvm) {
2185                 mutex_lock(&vcpu->mutex);
2186                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2187                         *rcp = rc;
2188                         *rrcp = rrc;
2189                         ret = -EIO;
2190                 }
2191                 mutex_unlock(&vcpu->mutex);
2192         }
2193         /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2194         if (use_gisa)
2195                 kvm_s390_gisa_enable(kvm);
2196         return ret;
2197 }
2198
2199 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2200 {
2201         unsigned long i;
2202         int r = 0;
2203         u16 dummy;
2204
2205         struct kvm_vcpu *vcpu;
2206
2207         /* Disable the GISA if the ultravisor does not support AIV. */
2208         if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2209                 kvm_s390_gisa_disable(kvm);
2210
2211         kvm_for_each_vcpu(i, vcpu, kvm) {
2212                 mutex_lock(&vcpu->mutex);
2213                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2214                 mutex_unlock(&vcpu->mutex);
2215                 if (r)
2216                         break;
2217         }
2218         if (r)
2219                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2220         return r;
2221 }
2222
2223 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2224 {
2225         int r = 0;
2226         u16 dummy;
2227         void __user *argp = (void __user *)cmd->data;
2228
2229         switch (cmd->cmd) {
2230         case KVM_PV_ENABLE: {
2231                 r = -EINVAL;
2232                 if (kvm_s390_pv_is_protected(kvm))
2233                         break;
2234
2235                 /*
2236                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2237                  *  esca, we need no cleanup in the error cases below
2238                  */
2239                 r = sca_switch_to_extended(kvm);
2240                 if (r)
2241                         break;
2242
2243                 mmap_write_lock(current->mm);
2244                 r = gmap_mark_unmergeable();
2245                 mmap_write_unlock(current->mm);
2246                 if (r)
2247                         break;
2248
2249                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2250                 if (r)
2251                         break;
2252
2253                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2254                 if (r)
2255                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2256
2257                 /* we need to block service interrupts from now on */
2258                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2259                 break;
2260         }
2261         case KVM_PV_DISABLE: {
2262                 r = -EINVAL;
2263                 if (!kvm_s390_pv_is_protected(kvm))
2264                         break;
2265
2266                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2267                 /*
2268                  * If a CPU could not be destroyed, destroy VM will also fail.
2269                  * There is no point in trying to destroy it. Instead return
2270                  * the rc and rrc from the first CPU that failed destroying.
2271                  */
2272                 if (r)
2273                         break;
2274                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2275
2276                 /* no need to block service interrupts any more */
2277                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2278                 break;
2279         }
2280         case KVM_PV_SET_SEC_PARMS: {
2281                 struct kvm_s390_pv_sec_parm parms = {};
2282                 void *hdr;
2283
2284                 r = -EINVAL;
2285                 if (!kvm_s390_pv_is_protected(kvm))
2286                         break;
2287
2288                 r = -EFAULT;
2289                 if (copy_from_user(&parms, argp, sizeof(parms)))
2290                         break;
2291
2292                 /* Currently restricted to 8KB */
2293                 r = -EINVAL;
2294                 if (parms.length > PAGE_SIZE * 2)
2295                         break;
2296
2297                 r = -ENOMEM;
2298                 hdr = vmalloc(parms.length);
2299                 if (!hdr)
2300                         break;
2301
2302                 r = -EFAULT;
2303                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2304                                     parms.length))
2305                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2306                                                       &cmd->rc, &cmd->rrc);
2307
2308                 vfree(hdr);
2309                 break;
2310         }
2311         case KVM_PV_UNPACK: {
2312                 struct kvm_s390_pv_unp unp = {};
2313
2314                 r = -EINVAL;
2315                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2316                         break;
2317
2318                 r = -EFAULT;
2319                 if (copy_from_user(&unp, argp, sizeof(unp)))
2320                         break;
2321
2322                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2323                                        &cmd->rc, &cmd->rrc);
2324                 break;
2325         }
2326         case KVM_PV_VERIFY: {
2327                 r = -EINVAL;
2328                 if (!kvm_s390_pv_is_protected(kvm))
2329                         break;
2330
2331                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2332                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2333                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2334                              cmd->rrc);
2335                 break;
2336         }
2337         case KVM_PV_PREP_RESET: {
2338                 r = -EINVAL;
2339                 if (!kvm_s390_pv_is_protected(kvm))
2340                         break;
2341
2342                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2343                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2344                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2345                              cmd->rc, cmd->rrc);
2346                 break;
2347         }
2348         case KVM_PV_UNSHARE_ALL: {
2349                 r = -EINVAL;
2350                 if (!kvm_s390_pv_is_protected(kvm))
2351                         break;
2352
2353                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2354                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2355                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2356                              cmd->rc, cmd->rrc);
2357                 break;
2358         }
2359         default:
2360                 r = -ENOTTY;
2361         }
2362         return r;
2363 }
2364
2365 static bool access_key_invalid(u8 access_key)
2366 {
2367         return access_key > 0xf;
2368 }
2369
2370 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2371 {
2372         void __user *uaddr = (void __user *)mop->buf;
2373         u64 supported_flags;
2374         void *tmpbuf = NULL;
2375         int r, srcu_idx;
2376
2377         supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2378                           | KVM_S390_MEMOP_F_CHECK_ONLY;
2379         if (mop->flags & ~supported_flags || !mop->size)
2380                 return -EINVAL;
2381         if (mop->size > MEM_OP_MAX_SIZE)
2382                 return -E2BIG;
2383         /*
2384          * This is technically a heuristic only, if the kvm->lock is not
2385          * taken, it is not guaranteed that the vm is/remains non-protected.
2386          * This is ok from a kernel perspective, wrongdoing is detected
2387          * on the access, -EFAULT is returned and the vm may crash the
2388          * next time it accesses the memory in question.
2389          * There is no sane usecase to do switching and a memop on two
2390          * different CPUs at the same time.
2391          */
2392         if (kvm_s390_pv_get_handle(kvm))
2393                 return -EINVAL;
2394         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2395                 if (access_key_invalid(mop->key))
2396                         return -EINVAL;
2397         } else {
2398                 mop->key = 0;
2399         }
2400         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2401                 tmpbuf = vmalloc(mop->size);
2402                 if (!tmpbuf)
2403                         return -ENOMEM;
2404         }
2405
2406         srcu_idx = srcu_read_lock(&kvm->srcu);
2407
2408         if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2409                 r = PGM_ADDRESSING;
2410                 goto out_unlock;
2411         }
2412
2413         switch (mop->op) {
2414         case KVM_S390_MEMOP_ABSOLUTE_READ: {
2415                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2416                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2417                 } else {
2418                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2419                                                       mop->size, GACC_FETCH, mop->key);
2420                         if (r == 0) {
2421                                 if (copy_to_user(uaddr, tmpbuf, mop->size))
2422                                         r = -EFAULT;
2423                         }
2424                 }
2425                 break;
2426         }
2427         case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2428                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2429                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2430                 } else {
2431                         if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2432                                 r = -EFAULT;
2433                                 break;
2434                         }
2435                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2436                                                       mop->size, GACC_STORE, mop->key);
2437                 }
2438                 break;
2439         }
2440         default:
2441                 r = -EINVAL;
2442         }
2443
2444 out_unlock:
2445         srcu_read_unlock(&kvm->srcu, srcu_idx);
2446
2447         vfree(tmpbuf);
2448         return r;
2449 }
2450
2451 long kvm_arch_vm_ioctl(struct file *filp,
2452                        unsigned int ioctl, unsigned long arg)
2453 {
2454         struct kvm *kvm = filp->private_data;
2455         void __user *argp = (void __user *)arg;
2456         struct kvm_device_attr attr;
2457         int r;
2458
2459         switch (ioctl) {
2460         case KVM_S390_INTERRUPT: {
2461                 struct kvm_s390_interrupt s390int;
2462
2463                 r = -EFAULT;
2464                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2465                         break;
2466                 r = kvm_s390_inject_vm(kvm, &s390int);
2467                 break;
2468         }
2469         case KVM_CREATE_IRQCHIP: {
2470                 struct kvm_irq_routing_entry routing;
2471
2472                 r = -EINVAL;
2473                 if (kvm->arch.use_irqchip) {
2474                         /* Set up dummy routing. */
2475                         memset(&routing, 0, sizeof(routing));
2476                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2477                 }
2478                 break;
2479         }
2480         case KVM_SET_DEVICE_ATTR: {
2481                 r = -EFAULT;
2482                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2483                         break;
2484                 r = kvm_s390_vm_set_attr(kvm, &attr);
2485                 break;
2486         }
2487         case KVM_GET_DEVICE_ATTR: {
2488                 r = -EFAULT;
2489                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2490                         break;
2491                 r = kvm_s390_vm_get_attr(kvm, &attr);
2492                 break;
2493         }
2494         case KVM_HAS_DEVICE_ATTR: {
2495                 r = -EFAULT;
2496                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2497                         break;
2498                 r = kvm_s390_vm_has_attr(kvm, &attr);
2499                 break;
2500         }
2501         case KVM_S390_GET_SKEYS: {
2502                 struct kvm_s390_skeys args;
2503
2504                 r = -EFAULT;
2505                 if (copy_from_user(&args, argp,
2506                                    sizeof(struct kvm_s390_skeys)))
2507                         break;
2508                 r = kvm_s390_get_skeys(kvm, &args);
2509                 break;
2510         }
2511         case KVM_S390_SET_SKEYS: {
2512                 struct kvm_s390_skeys args;
2513
2514                 r = -EFAULT;
2515                 if (copy_from_user(&args, argp,
2516                                    sizeof(struct kvm_s390_skeys)))
2517                         break;
2518                 r = kvm_s390_set_skeys(kvm, &args);
2519                 break;
2520         }
2521         case KVM_S390_GET_CMMA_BITS: {
2522                 struct kvm_s390_cmma_log args;
2523
2524                 r = -EFAULT;
2525                 if (copy_from_user(&args, argp, sizeof(args)))
2526                         break;
2527                 mutex_lock(&kvm->slots_lock);
2528                 r = kvm_s390_get_cmma_bits(kvm, &args);
2529                 mutex_unlock(&kvm->slots_lock);
2530                 if (!r) {
2531                         r = copy_to_user(argp, &args, sizeof(args));
2532                         if (r)
2533                                 r = -EFAULT;
2534                 }
2535                 break;
2536         }
2537         case KVM_S390_SET_CMMA_BITS: {
2538                 struct kvm_s390_cmma_log args;
2539
2540                 r = -EFAULT;
2541                 if (copy_from_user(&args, argp, sizeof(args)))
2542                         break;
2543                 mutex_lock(&kvm->slots_lock);
2544                 r = kvm_s390_set_cmma_bits(kvm, &args);
2545                 mutex_unlock(&kvm->slots_lock);
2546                 break;
2547         }
2548         case KVM_S390_PV_COMMAND: {
2549                 struct kvm_pv_cmd args;
2550
2551                 /* protvirt means user cpu state */
2552                 kvm_s390_set_user_cpu_state_ctrl(kvm);
2553                 r = 0;
2554                 if (!is_prot_virt_host()) {
2555                         r = -EINVAL;
2556                         break;
2557                 }
2558                 if (copy_from_user(&args, argp, sizeof(args))) {
2559                         r = -EFAULT;
2560                         break;
2561                 }
2562                 if (args.flags) {
2563                         r = -EINVAL;
2564                         break;
2565                 }
2566                 mutex_lock(&kvm->lock);
2567                 r = kvm_s390_handle_pv(kvm, &args);
2568                 mutex_unlock(&kvm->lock);
2569                 if (copy_to_user(argp, &args, sizeof(args))) {
2570                         r = -EFAULT;
2571                         break;
2572                 }
2573                 break;
2574         }
2575         case KVM_S390_MEM_OP: {
2576                 struct kvm_s390_mem_op mem_op;
2577
2578                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2579                         r = kvm_s390_vm_mem_op(kvm, &mem_op);
2580                 else
2581                         r = -EFAULT;
2582                 break;
2583         }
2584         default:
2585                 r = -ENOTTY;
2586         }
2587
2588         return r;
2589 }
2590
2591 static int kvm_s390_apxa_installed(void)
2592 {
2593         struct ap_config_info info;
2594
2595         if (ap_instructions_available()) {
2596                 if (ap_qci(&info) == 0)
2597                         return info.apxa;
2598         }
2599
2600         return 0;
2601 }
2602
2603 /*
2604  * The format of the crypto control block (CRYCB) is specified in the 3 low
2605  * order bits of the CRYCB designation (CRYCBD) field as follows:
2606  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2607  *           AP extended addressing (APXA) facility are installed.
2608  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2609  * Format 2: Both the APXA and MSAX3 facilities are installed
2610  */
2611 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2612 {
2613         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2614
2615         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2616         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2617
2618         /* Check whether MSAX3 is installed */
2619         if (!test_kvm_facility(kvm, 76))
2620                 return;
2621
2622         if (kvm_s390_apxa_installed())
2623                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2624         else
2625                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2626 }
2627
2628 /*
2629  * kvm_arch_crypto_set_masks
2630  *
2631  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2632  *       to be set.
2633  * @apm: the mask identifying the accessible AP adapters
2634  * @aqm: the mask identifying the accessible AP domains
2635  * @adm: the mask identifying the accessible AP control domains
2636  *
2637  * Set the masks that identify the adapters, domains and control domains to
2638  * which the KVM guest is granted access.
2639  *
2640  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2641  *       function.
2642  */
2643 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2644                                unsigned long *aqm, unsigned long *adm)
2645 {
2646         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2647
2648         kvm_s390_vcpu_block_all(kvm);
2649
2650         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2651         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2652                 memcpy(crycb->apcb1.apm, apm, 32);
2653                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2654                          apm[0], apm[1], apm[2], apm[3]);
2655                 memcpy(crycb->apcb1.aqm, aqm, 32);
2656                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2657                          aqm[0], aqm[1], aqm[2], aqm[3]);
2658                 memcpy(crycb->apcb1.adm, adm, 32);
2659                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2660                          adm[0], adm[1], adm[2], adm[3]);
2661                 break;
2662         case CRYCB_FORMAT1:
2663         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2664                 memcpy(crycb->apcb0.apm, apm, 8);
2665                 memcpy(crycb->apcb0.aqm, aqm, 2);
2666                 memcpy(crycb->apcb0.adm, adm, 2);
2667                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2668                          apm[0], *((unsigned short *)aqm),
2669                          *((unsigned short *)adm));
2670                 break;
2671         default:        /* Can not happen */
2672                 break;
2673         }
2674
2675         /* recreate the shadow crycb for each vcpu */
2676         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2677         kvm_s390_vcpu_unblock_all(kvm);
2678 }
2679 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2680
2681 /*
2682  * kvm_arch_crypto_clear_masks
2683  *
2684  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2685  *       to be cleared.
2686  *
2687  * Clear the masks that identify the adapters, domains and control domains to
2688  * which the KVM guest is granted access.
2689  *
2690  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2691  *       function.
2692  */
2693 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2694 {
2695         kvm_s390_vcpu_block_all(kvm);
2696
2697         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2698                sizeof(kvm->arch.crypto.crycb->apcb0));
2699         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2700                sizeof(kvm->arch.crypto.crycb->apcb1));
2701
2702         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2703         /* recreate the shadow crycb for each vcpu */
2704         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2705         kvm_s390_vcpu_unblock_all(kvm);
2706 }
2707 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2708
2709 static u64 kvm_s390_get_initial_cpuid(void)
2710 {
2711         struct cpuid cpuid;
2712
2713         get_cpu_id(&cpuid);
2714         cpuid.version = 0xff;
2715         return *((u64 *) &cpuid);
2716 }
2717
2718 static void kvm_s390_crypto_init(struct kvm *kvm)
2719 {
2720         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2721         kvm_s390_set_crycb_format(kvm);
2722         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2723
2724         if (!test_kvm_facility(kvm, 76))
2725                 return;
2726
2727         /* Enable AES/DEA protected key functions by default */
2728         kvm->arch.crypto.aes_kw = 1;
2729         kvm->arch.crypto.dea_kw = 1;
2730         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2731                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2732         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2733                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2734 }
2735
2736 static void sca_dispose(struct kvm *kvm)
2737 {
2738         if (kvm->arch.use_esca)
2739                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2740         else
2741                 free_page((unsigned long)(kvm->arch.sca));
2742         kvm->arch.sca = NULL;
2743 }
2744
2745 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2746 {
2747         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2748         int i, rc;
2749         char debug_name[16];
2750         static unsigned long sca_offset;
2751
2752         rc = -EINVAL;
2753 #ifdef CONFIG_KVM_S390_UCONTROL
2754         if (type & ~KVM_VM_S390_UCONTROL)
2755                 goto out_err;
2756         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2757                 goto out_err;
2758 #else
2759         if (type)
2760                 goto out_err;
2761 #endif
2762
2763         rc = s390_enable_sie();
2764         if (rc)
2765                 goto out_err;
2766
2767         rc = -ENOMEM;
2768
2769         if (!sclp.has_64bscao)
2770                 alloc_flags |= GFP_DMA;
2771         rwlock_init(&kvm->arch.sca_lock);
2772         /* start with basic SCA */
2773         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2774         if (!kvm->arch.sca)
2775                 goto out_err;
2776         mutex_lock(&kvm_lock);
2777         sca_offset += 16;
2778         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2779                 sca_offset = 0;
2780         kvm->arch.sca = (struct bsca_block *)
2781                         ((char *) kvm->arch.sca + sca_offset);
2782         mutex_unlock(&kvm_lock);
2783
2784         sprintf(debug_name, "kvm-%u", current->pid);
2785
2786         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2787         if (!kvm->arch.dbf)
2788                 goto out_err;
2789
2790         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2791         kvm->arch.sie_page2 =
2792              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2793         if (!kvm->arch.sie_page2)
2794                 goto out_err;
2795
2796         kvm->arch.sie_page2->kvm = kvm;
2797         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2798
2799         for (i = 0; i < kvm_s390_fac_size(); i++) {
2800                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2801                                               (kvm_s390_fac_base[i] |
2802                                                kvm_s390_fac_ext[i]);
2803                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2804                                               kvm_s390_fac_base[i];
2805         }
2806         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2807
2808         /* we are always in czam mode - even on pre z14 machines */
2809         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2810         set_kvm_facility(kvm->arch.model.fac_list, 138);
2811         /* we emulate STHYI in kvm */
2812         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2813         set_kvm_facility(kvm->arch.model.fac_list, 74);
2814         if (MACHINE_HAS_TLB_GUEST) {
2815                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2816                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2817         }
2818
2819         if (css_general_characteristics.aiv && test_facility(65))
2820                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2821
2822         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2823         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2824
2825         kvm_s390_crypto_init(kvm);
2826
2827         mutex_init(&kvm->arch.float_int.ais_lock);
2828         spin_lock_init(&kvm->arch.float_int.lock);
2829         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2830                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2831         init_waitqueue_head(&kvm->arch.ipte_wq);
2832         mutex_init(&kvm->arch.ipte_mutex);
2833
2834         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2835         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2836
2837         if (type & KVM_VM_S390_UCONTROL) {
2838                 kvm->arch.gmap = NULL;
2839                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2840         } else {
2841                 if (sclp.hamax == U64_MAX)
2842                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2843                 else
2844                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2845                                                     sclp.hamax + 1);
2846                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2847                 if (!kvm->arch.gmap)
2848                         goto out_err;
2849                 kvm->arch.gmap->private = kvm;
2850                 kvm->arch.gmap->pfault_enabled = 0;
2851         }
2852
2853         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2854         kvm->arch.use_skf = sclp.has_skey;
2855         spin_lock_init(&kvm->arch.start_stop_lock);
2856         kvm_s390_vsie_init(kvm);
2857         if (use_gisa)
2858                 kvm_s390_gisa_init(kvm);
2859         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2860
2861         return 0;
2862 out_err:
2863         free_page((unsigned long)kvm->arch.sie_page2);
2864         debug_unregister(kvm->arch.dbf);
2865         sca_dispose(kvm);
2866         KVM_EVENT(3, "creation of vm failed: %d", rc);
2867         return rc;
2868 }
2869
2870 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2871 {
2872         u16 rc, rrc;
2873
2874         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2875         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2876         kvm_s390_clear_local_irqs(vcpu);
2877         kvm_clear_async_pf_completion_queue(vcpu);
2878         if (!kvm_is_ucontrol(vcpu->kvm))
2879                 sca_del_vcpu(vcpu);
2880
2881         if (kvm_is_ucontrol(vcpu->kvm))
2882                 gmap_remove(vcpu->arch.gmap);
2883
2884         if (vcpu->kvm->arch.use_cmma)
2885                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2886         /* We can not hold the vcpu mutex here, we are already dying */
2887         if (kvm_s390_pv_cpu_get_handle(vcpu))
2888                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2889         free_page((unsigned long)(vcpu->arch.sie_block));
2890 }
2891
2892 void kvm_arch_destroy_vm(struct kvm *kvm)
2893 {
2894         u16 rc, rrc;
2895
2896         kvm_destroy_vcpus(kvm);
2897         sca_dispose(kvm);
2898         kvm_s390_gisa_destroy(kvm);
2899         /*
2900          * We are already at the end of life and kvm->lock is not taken.
2901          * This is ok as the file descriptor is closed by now and nobody
2902          * can mess with the pv state. To avoid lockdep_assert_held from
2903          * complaining we do not use kvm_s390_pv_is_protected.
2904          */
2905         if (kvm_s390_pv_get_handle(kvm))
2906                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2907         debug_unregister(kvm->arch.dbf);
2908         free_page((unsigned long)kvm->arch.sie_page2);
2909         if (!kvm_is_ucontrol(kvm))
2910                 gmap_remove(kvm->arch.gmap);
2911         kvm_s390_destroy_adapters(kvm);
2912         kvm_s390_clear_float_irqs(kvm);
2913         kvm_s390_vsie_destroy(kvm);
2914         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2915 }
2916
2917 /* Section: vcpu related */
2918 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2919 {
2920         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2921         if (!vcpu->arch.gmap)
2922                 return -ENOMEM;
2923         vcpu->arch.gmap->private = vcpu->kvm;
2924
2925         return 0;
2926 }
2927
2928 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2929 {
2930         if (!kvm_s390_use_sca_entries())
2931                 return;
2932         read_lock(&vcpu->kvm->arch.sca_lock);
2933         if (vcpu->kvm->arch.use_esca) {
2934                 struct esca_block *sca = vcpu->kvm->arch.sca;
2935
2936                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2937                 sca->cpu[vcpu->vcpu_id].sda = 0;
2938         } else {
2939                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2940
2941                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2942                 sca->cpu[vcpu->vcpu_id].sda = 0;
2943         }
2944         read_unlock(&vcpu->kvm->arch.sca_lock);
2945 }
2946
2947 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2948 {
2949         if (!kvm_s390_use_sca_entries()) {
2950                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2951
2952                 /* we still need the basic sca for the ipte control */
2953                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2954                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2955                 return;
2956         }
2957         read_lock(&vcpu->kvm->arch.sca_lock);
2958         if (vcpu->kvm->arch.use_esca) {
2959                 struct esca_block *sca = vcpu->kvm->arch.sca;
2960
2961                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2962                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2963                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2964                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2965                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2966         } else {
2967                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2968
2969                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2970                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2971                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2972                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2973         }
2974         read_unlock(&vcpu->kvm->arch.sca_lock);
2975 }
2976
2977 /* Basic SCA to Extended SCA data copy routines */
2978 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2979 {
2980         d->sda = s->sda;
2981         d->sigp_ctrl.c = s->sigp_ctrl.c;
2982         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2983 }
2984
2985 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2986 {
2987         int i;
2988
2989         d->ipte_control = s->ipte_control;
2990         d->mcn[0] = s->mcn;
2991         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2992                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2993 }
2994
2995 static int sca_switch_to_extended(struct kvm *kvm)
2996 {
2997         struct bsca_block *old_sca = kvm->arch.sca;
2998         struct esca_block *new_sca;
2999         struct kvm_vcpu *vcpu;
3000         unsigned long vcpu_idx;
3001         u32 scaol, scaoh;
3002
3003         if (kvm->arch.use_esca)
3004                 return 0;
3005
3006         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3007         if (!new_sca)
3008                 return -ENOMEM;
3009
3010         scaoh = (u32)((u64)(new_sca) >> 32);
3011         scaol = (u32)(u64)(new_sca) & ~0x3fU;
3012
3013         kvm_s390_vcpu_block_all(kvm);
3014         write_lock(&kvm->arch.sca_lock);
3015
3016         sca_copy_b_to_e(new_sca, old_sca);
3017
3018         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3019                 vcpu->arch.sie_block->scaoh = scaoh;
3020                 vcpu->arch.sie_block->scaol = scaol;
3021                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3022         }
3023         kvm->arch.sca = new_sca;
3024         kvm->arch.use_esca = 1;
3025
3026         write_unlock(&kvm->arch.sca_lock);
3027         kvm_s390_vcpu_unblock_all(kvm);
3028
3029         free_page((unsigned long)old_sca);
3030
3031         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3032                  old_sca, kvm->arch.sca);
3033         return 0;
3034 }
3035
3036 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3037 {
3038         int rc;
3039
3040         if (!kvm_s390_use_sca_entries()) {
3041                 if (id < KVM_MAX_VCPUS)
3042                         return true;
3043                 return false;
3044         }
3045         if (id < KVM_S390_BSCA_CPU_SLOTS)
3046                 return true;
3047         if (!sclp.has_esca || !sclp.has_64bscao)
3048                 return false;
3049
3050         mutex_lock(&kvm->lock);
3051         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3052         mutex_unlock(&kvm->lock);
3053
3054         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3055 }
3056
3057 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3058 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3059 {
3060         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3061         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3062         vcpu->arch.cputm_start = get_tod_clock_fast();
3063         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3064 }
3065
3066 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3067 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3068 {
3069         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3070         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3071         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3072         vcpu->arch.cputm_start = 0;
3073         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3074 }
3075
3076 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3077 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3078 {
3079         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3080         vcpu->arch.cputm_enabled = true;
3081         __start_cpu_timer_accounting(vcpu);
3082 }
3083
3084 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3085 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3086 {
3087         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3088         __stop_cpu_timer_accounting(vcpu);
3089         vcpu->arch.cputm_enabled = false;
3090 }
3091
3092 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3093 {
3094         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3095         __enable_cpu_timer_accounting(vcpu);
3096         preempt_enable();
3097 }
3098
3099 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3100 {
3101         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3102         __disable_cpu_timer_accounting(vcpu);
3103         preempt_enable();
3104 }
3105
3106 /* set the cpu timer - may only be called from the VCPU thread itself */
3107 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3108 {
3109         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3110         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3111         if (vcpu->arch.cputm_enabled)
3112                 vcpu->arch.cputm_start = get_tod_clock_fast();
3113         vcpu->arch.sie_block->cputm = cputm;
3114         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3115         preempt_enable();
3116 }
3117
3118 /* update and get the cpu timer - can also be called from other VCPU threads */
3119 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3120 {
3121         unsigned int seq;
3122         __u64 value;
3123
3124         if (unlikely(!vcpu->arch.cputm_enabled))
3125                 return vcpu->arch.sie_block->cputm;
3126
3127         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3128         do {
3129                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3130                 /*
3131                  * If the writer would ever execute a read in the critical
3132                  * section, e.g. in irq context, we have a deadlock.
3133                  */
3134                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3135                 value = vcpu->arch.sie_block->cputm;
3136                 /* if cputm_start is 0, accounting is being started/stopped */
3137                 if (likely(vcpu->arch.cputm_start))
3138                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3139         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3140         preempt_enable();
3141         return value;
3142 }
3143
3144 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3145 {
3146
3147         gmap_enable(vcpu->arch.enabled_gmap);
3148         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3149         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3150                 __start_cpu_timer_accounting(vcpu);
3151         vcpu->cpu = cpu;
3152 }
3153
3154 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3155 {
3156         vcpu->cpu = -1;
3157         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3158                 __stop_cpu_timer_accounting(vcpu);
3159         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3160         vcpu->arch.enabled_gmap = gmap_get_enabled();
3161         gmap_disable(vcpu->arch.enabled_gmap);
3162
3163 }
3164
3165 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3166 {
3167         mutex_lock(&vcpu->kvm->lock);
3168         preempt_disable();
3169         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3170         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3171         preempt_enable();
3172         mutex_unlock(&vcpu->kvm->lock);
3173         if (!kvm_is_ucontrol(vcpu->kvm)) {
3174                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3175                 sca_add_vcpu(vcpu);
3176         }
3177         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3178                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3179         /* make vcpu_load load the right gmap on the first trigger */
3180         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3181 }
3182
3183 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3184 {
3185         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3186             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3187                 return true;
3188         return false;
3189 }
3190
3191 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3192 {
3193         /* At least one ECC subfunction must be present */
3194         return kvm_has_pckmo_subfunc(kvm, 32) ||
3195                kvm_has_pckmo_subfunc(kvm, 33) ||
3196                kvm_has_pckmo_subfunc(kvm, 34) ||
3197                kvm_has_pckmo_subfunc(kvm, 40) ||
3198                kvm_has_pckmo_subfunc(kvm, 41);
3199
3200 }
3201
3202 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3203 {
3204         /*
3205          * If the AP instructions are not being interpreted and the MSAX3
3206          * facility is not configured for the guest, there is nothing to set up.
3207          */
3208         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3209                 return;
3210
3211         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3212         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3213         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3214         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3215
3216         if (vcpu->kvm->arch.crypto.apie)
3217                 vcpu->arch.sie_block->eca |= ECA_APIE;
3218
3219         /* Set up protected key support */
3220         if (vcpu->kvm->arch.crypto.aes_kw) {
3221                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3222                 /* ecc is also wrapped with AES key */
3223                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3224                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3225         }
3226
3227         if (vcpu->kvm->arch.crypto.dea_kw)
3228                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3229 }
3230
3231 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3232 {
3233         free_page(vcpu->arch.sie_block->cbrlo);
3234         vcpu->arch.sie_block->cbrlo = 0;
3235 }
3236
3237 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3238 {
3239         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3240         if (!vcpu->arch.sie_block->cbrlo)
3241                 return -ENOMEM;
3242         return 0;
3243 }
3244
3245 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3246 {
3247         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3248
3249         vcpu->arch.sie_block->ibc = model->ibc;
3250         if (test_kvm_facility(vcpu->kvm, 7))
3251                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3252 }
3253
3254 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3255 {
3256         int rc = 0;
3257         u16 uvrc, uvrrc;
3258
3259         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3260                                                     CPUSTAT_SM |
3261                                                     CPUSTAT_STOPPED);
3262
3263         if (test_kvm_facility(vcpu->kvm, 78))
3264                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3265         else if (test_kvm_facility(vcpu->kvm, 8))
3266                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3267
3268         kvm_s390_vcpu_setup_model(vcpu);
3269
3270         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3271         if (MACHINE_HAS_ESOP)
3272                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3273         if (test_kvm_facility(vcpu->kvm, 9))
3274                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3275         if (test_kvm_facility(vcpu->kvm, 73))
3276                 vcpu->arch.sie_block->ecb |= ECB_TE;
3277         if (!kvm_is_ucontrol(vcpu->kvm))
3278                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3279
3280         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3281                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3282         if (test_kvm_facility(vcpu->kvm, 130))
3283                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3284         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3285         if (sclp.has_cei)
3286                 vcpu->arch.sie_block->eca |= ECA_CEI;
3287         if (sclp.has_ib)
3288                 vcpu->arch.sie_block->eca |= ECA_IB;
3289         if (sclp.has_siif)
3290                 vcpu->arch.sie_block->eca |= ECA_SII;
3291         if (sclp.has_sigpif)
3292                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3293         if (test_kvm_facility(vcpu->kvm, 129)) {
3294                 vcpu->arch.sie_block->eca |= ECA_VX;
3295                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3296         }
3297         if (test_kvm_facility(vcpu->kvm, 139))
3298                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3299         if (test_kvm_facility(vcpu->kvm, 156))
3300                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3301         if (vcpu->arch.sie_block->gd) {
3302                 vcpu->arch.sie_block->eca |= ECA_AIV;
3303                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3304                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3305         }
3306         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3307                                         | SDNXC;
3308         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3309
3310         if (sclp.has_kss)
3311                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3312         else
3313                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3314
3315         if (vcpu->kvm->arch.use_cmma) {
3316                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3317                 if (rc)
3318                         return rc;
3319         }
3320         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3321         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3322
3323         vcpu->arch.sie_block->hpid = HPID_KVM;
3324
3325         kvm_s390_vcpu_crypto_setup(vcpu);
3326
3327         mutex_lock(&vcpu->kvm->lock);
3328         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3329                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3330                 if (rc)
3331                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3332         }
3333         mutex_unlock(&vcpu->kvm->lock);
3334
3335         return rc;
3336 }
3337
3338 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3339 {
3340         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3341                 return -EINVAL;
3342         return 0;
3343 }
3344
3345 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3346 {
3347         struct sie_page *sie_page;
3348         int rc;
3349
3350         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3351         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3352         if (!sie_page)
3353                 return -ENOMEM;
3354
3355         vcpu->arch.sie_block = &sie_page->sie_block;
3356         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3357
3358         /* the real guest size will always be smaller than msl */
3359         vcpu->arch.sie_block->mso = 0;
3360         vcpu->arch.sie_block->msl = sclp.hamax;
3361
3362         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3363         spin_lock_init(&vcpu->arch.local_int.lock);
3364         vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3365         seqcount_init(&vcpu->arch.cputm_seqcount);
3366
3367         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3368         kvm_clear_async_pf_completion_queue(vcpu);
3369         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3370                                     KVM_SYNC_GPRS |
3371                                     KVM_SYNC_ACRS |
3372                                     KVM_SYNC_CRS |
3373                                     KVM_SYNC_ARCH0 |
3374                                     KVM_SYNC_PFAULT |
3375                                     KVM_SYNC_DIAG318;
3376         kvm_s390_set_prefix(vcpu, 0);
3377         if (test_kvm_facility(vcpu->kvm, 64))
3378                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3379         if (test_kvm_facility(vcpu->kvm, 82))
3380                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3381         if (test_kvm_facility(vcpu->kvm, 133))
3382                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3383         if (test_kvm_facility(vcpu->kvm, 156))
3384                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3385         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3386          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3387          */
3388         if (MACHINE_HAS_VX)
3389                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3390         else
3391                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3392
3393         if (kvm_is_ucontrol(vcpu->kvm)) {
3394                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3395                 if (rc)
3396                         goto out_free_sie_block;
3397         }
3398
3399         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3400                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3401         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3402
3403         rc = kvm_s390_vcpu_setup(vcpu);
3404         if (rc)
3405                 goto out_ucontrol_uninit;
3406         return 0;
3407
3408 out_ucontrol_uninit:
3409         if (kvm_is_ucontrol(vcpu->kvm))
3410                 gmap_remove(vcpu->arch.gmap);
3411 out_free_sie_block:
3412         free_page((unsigned long)(vcpu->arch.sie_block));
3413         return rc;
3414 }
3415
3416 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3417 {
3418         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3419         return kvm_s390_vcpu_has_irq(vcpu, 0);
3420 }
3421
3422 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3423 {
3424         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3425 }
3426
3427 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3428 {
3429         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3430         exit_sie(vcpu);
3431 }
3432
3433 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3434 {
3435         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3436 }
3437
3438 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3439 {
3440         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3441         exit_sie(vcpu);
3442 }
3443
3444 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3445 {
3446         return atomic_read(&vcpu->arch.sie_block->prog20) &
3447                (PROG_BLOCK_SIE | PROG_REQUEST);
3448 }
3449
3450 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3451 {
3452         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3453 }
3454
3455 /*
3456  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3457  * If the CPU is not running (e.g. waiting as idle) the function will
3458  * return immediately. */
3459 void exit_sie(struct kvm_vcpu *vcpu)
3460 {
3461         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3462         kvm_s390_vsie_kick(vcpu);
3463         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3464                 cpu_relax();
3465 }
3466
3467 /* Kick a guest cpu out of SIE to process a request synchronously */
3468 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3469 {
3470         __kvm_make_request(req, vcpu);
3471         kvm_s390_vcpu_request(vcpu);
3472 }
3473
3474 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3475                               unsigned long end)
3476 {
3477         struct kvm *kvm = gmap->private;
3478         struct kvm_vcpu *vcpu;
3479         unsigned long prefix;
3480         unsigned long i;
3481
3482         if (gmap_is_shadow(gmap))
3483                 return;
3484         if (start >= 1UL << 31)
3485                 /* We are only interested in prefix pages */
3486                 return;
3487         kvm_for_each_vcpu(i, vcpu, kvm) {
3488                 /* match against both prefix pages */
3489                 prefix = kvm_s390_get_prefix(vcpu);
3490                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3491                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3492                                    start, end);
3493                         kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3494                 }
3495         }
3496 }
3497
3498 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3499 {
3500         /* do not poll with more than halt_poll_max_steal percent of steal time */
3501         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3502             READ_ONCE(halt_poll_max_steal)) {
3503                 vcpu->stat.halt_no_poll_steal++;
3504                 return true;
3505         }
3506         return false;
3507 }
3508
3509 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3510 {
3511         /* kvm common code refers to this, but never calls it */
3512         BUG();
3513         return 0;
3514 }
3515
3516 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3517                                            struct kvm_one_reg *reg)
3518 {
3519         int r = -EINVAL;
3520
3521         switch (reg->id) {
3522         case KVM_REG_S390_TODPR:
3523                 r = put_user(vcpu->arch.sie_block->todpr,
3524                              (u32 __user *)reg->addr);
3525                 break;
3526         case KVM_REG_S390_EPOCHDIFF:
3527                 r = put_user(vcpu->arch.sie_block->epoch,
3528                              (u64 __user *)reg->addr);
3529                 break;
3530         case KVM_REG_S390_CPU_TIMER:
3531                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3532                              (u64 __user *)reg->addr);
3533                 break;
3534         case KVM_REG_S390_CLOCK_COMP:
3535                 r = put_user(vcpu->arch.sie_block->ckc,
3536                              (u64 __user *)reg->addr);
3537                 break;
3538         case KVM_REG_S390_PFTOKEN:
3539                 r = put_user(vcpu->arch.pfault_token,
3540                              (u64 __user *)reg->addr);
3541                 break;
3542         case KVM_REG_S390_PFCOMPARE:
3543                 r = put_user(vcpu->arch.pfault_compare,
3544                              (u64 __user *)reg->addr);
3545                 break;
3546         case KVM_REG_S390_PFSELECT:
3547                 r = put_user(vcpu->arch.pfault_select,
3548                              (u64 __user *)reg->addr);
3549                 break;
3550         case KVM_REG_S390_PP:
3551                 r = put_user(vcpu->arch.sie_block->pp,
3552                              (u64 __user *)reg->addr);
3553                 break;
3554         case KVM_REG_S390_GBEA:
3555                 r = put_user(vcpu->arch.sie_block->gbea,
3556                              (u64 __user *)reg->addr);
3557                 break;
3558         default:
3559                 break;
3560         }
3561
3562         return r;
3563 }
3564
3565 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3566                                            struct kvm_one_reg *reg)
3567 {
3568         int r = -EINVAL;
3569         __u64 val;
3570
3571         switch (reg->id) {
3572         case KVM_REG_S390_TODPR:
3573                 r = get_user(vcpu->arch.sie_block->todpr,
3574                              (u32 __user *)reg->addr);
3575                 break;
3576         case KVM_REG_S390_EPOCHDIFF:
3577                 r = get_user(vcpu->arch.sie_block->epoch,
3578                              (u64 __user *)reg->addr);
3579                 break;
3580         case KVM_REG_S390_CPU_TIMER:
3581                 r = get_user(val, (u64 __user *)reg->addr);
3582                 if (!r)
3583                         kvm_s390_set_cpu_timer(vcpu, val);
3584                 break;
3585         case KVM_REG_S390_CLOCK_COMP:
3586                 r = get_user(vcpu->arch.sie_block->ckc,
3587                              (u64 __user *)reg->addr);
3588                 break;
3589         case KVM_REG_S390_PFTOKEN:
3590                 r = get_user(vcpu->arch.pfault_token,
3591                              (u64 __user *)reg->addr);
3592                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3593                         kvm_clear_async_pf_completion_queue(vcpu);
3594                 break;
3595         case KVM_REG_S390_PFCOMPARE:
3596                 r = get_user(vcpu->arch.pfault_compare,
3597                              (u64 __user *)reg->addr);
3598                 break;
3599         case KVM_REG_S390_PFSELECT:
3600                 r = get_user(vcpu->arch.pfault_select,
3601                              (u64 __user *)reg->addr);
3602                 break;
3603         case KVM_REG_S390_PP:
3604                 r = get_user(vcpu->arch.sie_block->pp,
3605                              (u64 __user *)reg->addr);
3606                 break;
3607         case KVM_REG_S390_GBEA:
3608                 r = get_user(vcpu->arch.sie_block->gbea,
3609                              (u64 __user *)reg->addr);
3610                 break;
3611         default:
3612                 break;
3613         }
3614
3615         return r;
3616 }
3617
3618 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3619 {
3620         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3621         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3622         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3623
3624         kvm_clear_async_pf_completion_queue(vcpu);
3625         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3626                 kvm_s390_vcpu_stop(vcpu);
3627         kvm_s390_clear_local_irqs(vcpu);
3628 }
3629
3630 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3631 {
3632         /* Initial reset is a superset of the normal reset */
3633         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3634
3635         /*
3636          * This equals initial cpu reset in pop, but we don't switch to ESA.
3637          * We do not only reset the internal data, but also ...
3638          */
3639         vcpu->arch.sie_block->gpsw.mask = 0;
3640         vcpu->arch.sie_block->gpsw.addr = 0;
3641         kvm_s390_set_prefix(vcpu, 0);
3642         kvm_s390_set_cpu_timer(vcpu, 0);
3643         vcpu->arch.sie_block->ckc = 0;
3644         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3645         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3646         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3647
3648         /* ... the data in sync regs */
3649         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3650         vcpu->run->s.regs.ckc = 0;
3651         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3652         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3653         vcpu->run->psw_addr = 0;
3654         vcpu->run->psw_mask = 0;
3655         vcpu->run->s.regs.todpr = 0;
3656         vcpu->run->s.regs.cputm = 0;
3657         vcpu->run->s.regs.ckc = 0;
3658         vcpu->run->s.regs.pp = 0;
3659         vcpu->run->s.regs.gbea = 1;
3660         vcpu->run->s.regs.fpc = 0;
3661         /*
3662          * Do not reset these registers in the protected case, as some of
3663          * them are overlayed and they are not accessible in this case
3664          * anyway.
3665          */
3666         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3667                 vcpu->arch.sie_block->gbea = 1;
3668                 vcpu->arch.sie_block->pp = 0;
3669                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3670                 vcpu->arch.sie_block->todpr = 0;
3671         }
3672 }
3673
3674 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3675 {
3676         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3677
3678         /* Clear reset is a superset of the initial reset */
3679         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3680
3681         memset(&regs->gprs, 0, sizeof(regs->gprs));
3682         memset(&regs->vrs, 0, sizeof(regs->vrs));
3683         memset(&regs->acrs, 0, sizeof(regs->acrs));
3684         memset(&regs->gscb, 0, sizeof(regs->gscb));
3685
3686         regs->etoken = 0;
3687         regs->etoken_extension = 0;
3688 }
3689
3690 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3691 {
3692         vcpu_load(vcpu);
3693         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3694         vcpu_put(vcpu);
3695         return 0;
3696 }
3697
3698 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3699 {
3700         vcpu_load(vcpu);
3701         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3702         vcpu_put(vcpu);
3703         return 0;
3704 }
3705
3706 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3707                                   struct kvm_sregs *sregs)
3708 {
3709         vcpu_load(vcpu);
3710
3711         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3712         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3713
3714         vcpu_put(vcpu);
3715         return 0;
3716 }
3717
3718 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3719                                   struct kvm_sregs *sregs)
3720 {
3721         vcpu_load(vcpu);
3722
3723         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3724         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3725
3726         vcpu_put(vcpu);
3727         return 0;
3728 }
3729
3730 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3731 {
3732         int ret = 0;
3733
3734         vcpu_load(vcpu);
3735
3736         if (test_fp_ctl(fpu->fpc)) {
3737                 ret = -EINVAL;
3738                 goto out;
3739         }
3740         vcpu->run->s.regs.fpc = fpu->fpc;
3741         if (MACHINE_HAS_VX)
3742                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3743                                  (freg_t *) fpu->fprs);
3744         else
3745                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3746
3747 out:
3748         vcpu_put(vcpu);
3749         return ret;
3750 }
3751
3752 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3753 {
3754         vcpu_load(vcpu);
3755
3756         /* make sure we have the latest values */
3757         save_fpu_regs();
3758         if (MACHINE_HAS_VX)
3759                 convert_vx_to_fp((freg_t *) fpu->fprs,
3760                                  (__vector128 *) vcpu->run->s.regs.vrs);
3761         else
3762                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3763         fpu->fpc = vcpu->run->s.regs.fpc;
3764
3765         vcpu_put(vcpu);
3766         return 0;
3767 }
3768
3769 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3770 {
3771         int rc = 0;
3772
3773         if (!is_vcpu_stopped(vcpu))
3774                 rc = -EBUSY;
3775         else {
3776                 vcpu->run->psw_mask = psw.mask;
3777                 vcpu->run->psw_addr = psw.addr;
3778         }
3779         return rc;
3780 }
3781
3782 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3783                                   struct kvm_translation *tr)
3784 {
3785         return -EINVAL; /* not implemented yet */
3786 }
3787
3788 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3789                               KVM_GUESTDBG_USE_HW_BP | \
3790                               KVM_GUESTDBG_ENABLE)
3791
3792 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3793                                         struct kvm_guest_debug *dbg)
3794 {
3795         int rc = 0;
3796
3797         vcpu_load(vcpu);
3798
3799         vcpu->guest_debug = 0;
3800         kvm_s390_clear_bp_data(vcpu);
3801
3802         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3803                 rc = -EINVAL;
3804                 goto out;
3805         }
3806         if (!sclp.has_gpere) {
3807                 rc = -EINVAL;
3808                 goto out;
3809         }
3810
3811         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3812                 vcpu->guest_debug = dbg->control;
3813                 /* enforce guest PER */
3814                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3815
3816                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3817                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3818         } else {
3819                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3820                 vcpu->arch.guestdbg.last_bp = 0;
3821         }
3822
3823         if (rc) {
3824                 vcpu->guest_debug = 0;
3825                 kvm_s390_clear_bp_data(vcpu);
3826                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3827         }
3828
3829 out:
3830         vcpu_put(vcpu);
3831         return rc;
3832 }
3833
3834 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3835                                     struct kvm_mp_state *mp_state)
3836 {
3837         int ret;
3838
3839         vcpu_load(vcpu);
3840
3841         /* CHECK_STOP and LOAD are not supported yet */
3842         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3843                                       KVM_MP_STATE_OPERATING;
3844
3845         vcpu_put(vcpu);
3846         return ret;
3847 }
3848
3849 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3850                                     struct kvm_mp_state *mp_state)
3851 {
3852         int rc = 0;
3853
3854         vcpu_load(vcpu);
3855
3856         /* user space knows about this interface - let it control the state */
3857         kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3858
3859         switch (mp_state->mp_state) {
3860         case KVM_MP_STATE_STOPPED:
3861                 rc = kvm_s390_vcpu_stop(vcpu);
3862                 break;
3863         case KVM_MP_STATE_OPERATING:
3864                 rc = kvm_s390_vcpu_start(vcpu);
3865                 break;
3866         case KVM_MP_STATE_LOAD:
3867                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3868                         rc = -ENXIO;
3869                         break;
3870                 }
3871                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3872                 break;
3873         case KVM_MP_STATE_CHECK_STOP:
3874                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3875         default:
3876                 rc = -ENXIO;
3877         }
3878
3879         vcpu_put(vcpu);
3880         return rc;
3881 }
3882
3883 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3884 {
3885         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3886 }
3887
3888 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3889 {
3890 retry:
3891         kvm_s390_vcpu_request_handled(vcpu);
3892         if (!kvm_request_pending(vcpu))
3893                 return 0;
3894         /*
3895          * If the guest prefix changed, re-arm the ipte notifier for the
3896          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3897          * This ensures that the ipte instruction for this request has
3898          * already finished. We might race against a second unmapper that
3899          * wants to set the blocking bit. Lets just retry the request loop.
3900          */
3901         if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
3902                 int rc;
3903                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3904                                           kvm_s390_get_prefix(vcpu),
3905                                           PAGE_SIZE * 2, PROT_WRITE);
3906                 if (rc) {
3907                         kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3908                         return rc;
3909                 }
3910                 goto retry;
3911         }
3912
3913         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3914                 vcpu->arch.sie_block->ihcpu = 0xffff;
3915                 goto retry;
3916         }
3917
3918         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3919                 if (!ibs_enabled(vcpu)) {
3920                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3921                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3922                 }
3923                 goto retry;
3924         }
3925
3926         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3927                 if (ibs_enabled(vcpu)) {
3928                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3929                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3930                 }
3931                 goto retry;
3932         }
3933
3934         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3935                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3936                 goto retry;
3937         }
3938
3939         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3940                 /*
3941                  * Disable CMM virtualization; we will emulate the ESSA
3942                  * instruction manually, in order to provide additional
3943                  * functionalities needed for live migration.
3944                  */
3945                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3946                 goto retry;
3947         }
3948
3949         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3950                 /*
3951                  * Re-enable CMM virtualization if CMMA is available and
3952                  * CMM has been used.
3953                  */
3954                 if ((vcpu->kvm->arch.use_cmma) &&
3955                     (vcpu->kvm->mm->context.uses_cmm))
3956                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3957                 goto retry;
3958         }
3959
3960         /* nothing to do, just clear the request */
3961         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3962         /* we left the vsie handler, nothing to do, just clear the request */
3963         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3964
3965         return 0;
3966 }
3967
3968 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3969 {
3970         struct kvm_vcpu *vcpu;
3971         union tod_clock clk;
3972         unsigned long i;
3973
3974         preempt_disable();
3975
3976         store_tod_clock_ext(&clk);
3977
3978         kvm->arch.epoch = gtod->tod - clk.tod;
3979         kvm->arch.epdx = 0;
3980         if (test_kvm_facility(kvm, 139)) {
3981                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3982                 if (kvm->arch.epoch > gtod->tod)
3983                         kvm->arch.epdx -= 1;
3984         }
3985
3986         kvm_s390_vcpu_block_all(kvm);
3987         kvm_for_each_vcpu(i, vcpu, kvm) {
3988                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3989                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3990         }
3991
3992         kvm_s390_vcpu_unblock_all(kvm);
3993         preempt_enable();
3994 }
3995
3996 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3997 {
3998         mutex_lock(&kvm->lock);
3999         __kvm_s390_set_tod_clock(kvm, gtod);
4000         mutex_unlock(&kvm->lock);
4001 }
4002
4003 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
4004 {
4005         if (!mutex_trylock(&kvm->lock))
4006                 return 0;
4007         __kvm_s390_set_tod_clock(kvm, gtod);
4008         mutex_unlock(&kvm->lock);
4009         return 1;
4010 }
4011
4012 /**
4013  * kvm_arch_fault_in_page - fault-in guest page if necessary
4014  * @vcpu: The corresponding virtual cpu
4015  * @gpa: Guest physical address
4016  * @writable: Whether the page should be writable or not
4017  *
4018  * Make sure that a guest page has been faulted-in on the host.
4019  *
4020  * Return: Zero on success, negative error code otherwise.
4021  */
4022 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4023 {
4024         return gmap_fault(vcpu->arch.gmap, gpa,
4025                           writable ? FAULT_FLAG_WRITE : 0);
4026 }
4027
4028 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4029                                       unsigned long token)
4030 {
4031         struct kvm_s390_interrupt inti;
4032         struct kvm_s390_irq irq;
4033
4034         if (start_token) {
4035                 irq.u.ext.ext_params2 = token;
4036                 irq.type = KVM_S390_INT_PFAULT_INIT;
4037                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4038         } else {
4039                 inti.type = KVM_S390_INT_PFAULT_DONE;
4040                 inti.parm64 = token;
4041                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4042         }
4043 }
4044
4045 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4046                                      struct kvm_async_pf *work)
4047 {
4048         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4049         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4050
4051         return true;
4052 }
4053
4054 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4055                                  struct kvm_async_pf *work)
4056 {
4057         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4058         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4059 }
4060
4061 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4062                                struct kvm_async_pf *work)
4063 {
4064         /* s390 will always inject the page directly */
4065 }
4066
4067 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4068 {
4069         /*
4070          * s390 will always inject the page directly,
4071          * but we still want check_async_completion to cleanup
4072          */
4073         return true;
4074 }
4075
4076 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4077 {
4078         hva_t hva;
4079         struct kvm_arch_async_pf arch;
4080
4081         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4082                 return false;
4083         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4084             vcpu->arch.pfault_compare)
4085                 return false;
4086         if (psw_extint_disabled(vcpu))
4087                 return false;
4088         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4089                 return false;
4090         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4091                 return false;
4092         if (!vcpu->arch.gmap->pfault_enabled)
4093                 return false;
4094
4095         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4096         hva += current->thread.gmap_addr & ~PAGE_MASK;
4097         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4098                 return false;
4099
4100         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4101 }
4102
4103 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4104 {
4105         int rc, cpuflags;
4106
4107         /*
4108          * On s390 notifications for arriving pages will be delivered directly
4109          * to the guest but the house keeping for completed pfaults is
4110          * handled outside the worker.
4111          */
4112         kvm_check_async_pf_completion(vcpu);
4113
4114         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4115         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4116
4117         if (need_resched())
4118                 schedule();
4119
4120         if (!kvm_is_ucontrol(vcpu->kvm)) {
4121                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4122                 if (rc)
4123                         return rc;
4124         }
4125
4126         rc = kvm_s390_handle_requests(vcpu);
4127         if (rc)
4128                 return rc;
4129
4130         if (guestdbg_enabled(vcpu)) {
4131                 kvm_s390_backup_guest_per_regs(vcpu);
4132                 kvm_s390_patch_guest_per_regs(vcpu);
4133         }
4134
4135         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4136
4137         vcpu->arch.sie_block->icptcode = 0;
4138         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4139         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4140         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4141
4142         return 0;
4143 }
4144
4145 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4146 {
4147         struct kvm_s390_pgm_info pgm_info = {
4148                 .code = PGM_ADDRESSING,
4149         };
4150         u8 opcode, ilen;
4151         int rc;
4152
4153         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4154         trace_kvm_s390_sie_fault(vcpu);
4155
4156         /*
4157          * We want to inject an addressing exception, which is defined as a
4158          * suppressing or terminating exception. However, since we came here
4159          * by a DAT access exception, the PSW still points to the faulting
4160          * instruction since DAT exceptions are nullifying. So we've got
4161          * to look up the current opcode to get the length of the instruction
4162          * to be able to forward the PSW.
4163          */
4164         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4165         ilen = insn_length(opcode);
4166         if (rc < 0) {
4167                 return rc;
4168         } else if (rc) {
4169                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4170                  * Forward by arbitrary ilc, injection will take care of
4171                  * nullification if necessary.
4172                  */
4173                 pgm_info = vcpu->arch.pgm;
4174                 ilen = 4;
4175         }
4176         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4177         kvm_s390_forward_psw(vcpu, ilen);
4178         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4179 }
4180
4181 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4182 {
4183         struct mcck_volatile_info *mcck_info;
4184         struct sie_page *sie_page;
4185
4186         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4187                    vcpu->arch.sie_block->icptcode);
4188         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4189
4190         if (guestdbg_enabled(vcpu))
4191                 kvm_s390_restore_guest_per_regs(vcpu);
4192
4193         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4194         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4195
4196         if (exit_reason == -EINTR) {
4197                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4198                 sie_page = container_of(vcpu->arch.sie_block,
4199                                         struct sie_page, sie_block);
4200                 mcck_info = &sie_page->mcck_info;
4201                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4202                 return 0;
4203         }
4204
4205         if (vcpu->arch.sie_block->icptcode > 0) {
4206                 int rc = kvm_handle_sie_intercept(vcpu);
4207
4208                 if (rc != -EOPNOTSUPP)
4209                         return rc;
4210                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4211                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4212                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4213                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4214                 return -EREMOTE;
4215         } else if (exit_reason != -EFAULT) {
4216                 vcpu->stat.exit_null++;
4217                 return 0;
4218         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4219                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4220                 vcpu->run->s390_ucontrol.trans_exc_code =
4221                                                 current->thread.gmap_addr;
4222                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4223                 return -EREMOTE;
4224         } else if (current->thread.gmap_pfault) {
4225                 trace_kvm_s390_major_guest_pfault(vcpu);
4226                 current->thread.gmap_pfault = 0;
4227                 if (kvm_arch_setup_async_pf(vcpu))
4228                         return 0;
4229                 vcpu->stat.pfault_sync++;
4230                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4231         }
4232         return vcpu_post_run_fault_in_sie(vcpu);
4233 }
4234
4235 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4236 static int __vcpu_run(struct kvm_vcpu *vcpu)
4237 {
4238         int rc, exit_reason;
4239         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4240
4241         /*
4242          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4243          * ning the guest), so that memslots (and other stuff) are protected
4244          */
4245         kvm_vcpu_srcu_read_lock(vcpu);
4246
4247         do {
4248                 rc = vcpu_pre_run(vcpu);
4249                 if (rc)
4250                         break;
4251
4252                 kvm_vcpu_srcu_read_unlock(vcpu);
4253                 /*
4254                  * As PF_VCPU will be used in fault handler, between
4255                  * guest_enter and guest_exit should be no uaccess.
4256                  */
4257                 local_irq_disable();
4258                 guest_enter_irqoff();
4259                 __disable_cpu_timer_accounting(vcpu);
4260                 local_irq_enable();
4261                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4262                         memcpy(sie_page->pv_grregs,
4263                                vcpu->run->s.regs.gprs,
4264                                sizeof(sie_page->pv_grregs));
4265                 }
4266                 if (test_cpu_flag(CIF_FPU))
4267                         load_fpu_regs();
4268                 exit_reason = sie64a(vcpu->arch.sie_block,
4269                                      vcpu->run->s.regs.gprs);
4270                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4271                         memcpy(vcpu->run->s.regs.gprs,
4272                                sie_page->pv_grregs,
4273                                sizeof(sie_page->pv_grregs));
4274                         /*
4275                          * We're not allowed to inject interrupts on intercepts
4276                          * that leave the guest state in an "in-between" state
4277                          * where the next SIE entry will do a continuation.
4278                          * Fence interrupts in our "internal" PSW.
4279                          */
4280                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4281                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4282                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4283                         }
4284                 }
4285                 local_irq_disable();
4286                 __enable_cpu_timer_accounting(vcpu);
4287                 guest_exit_irqoff();
4288                 local_irq_enable();
4289                 kvm_vcpu_srcu_read_lock(vcpu);
4290
4291                 rc = vcpu_post_run(vcpu, exit_reason);
4292         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4293
4294         kvm_vcpu_srcu_read_unlock(vcpu);
4295         return rc;
4296 }
4297
4298 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4299 {
4300         struct kvm_run *kvm_run = vcpu->run;
4301         struct runtime_instr_cb *riccb;
4302         struct gs_cb *gscb;
4303
4304         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4305         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4306         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4307         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4308         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4309                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4310                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4311                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4312         }
4313         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4314                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4315                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4316                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4317                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4318                         kvm_clear_async_pf_completion_queue(vcpu);
4319         }
4320         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4321                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4322                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4323                 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4324         }
4325         /*
4326          * If userspace sets the riccb (e.g. after migration) to a valid state,
4327          * we should enable RI here instead of doing the lazy enablement.
4328          */
4329         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4330             test_kvm_facility(vcpu->kvm, 64) &&
4331             riccb->v &&
4332             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4333                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4334                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4335         }
4336         /*
4337          * If userspace sets the gscb (e.g. after migration) to non-zero,
4338          * we should enable GS here instead of doing the lazy enablement.
4339          */
4340         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4341             test_kvm_facility(vcpu->kvm, 133) &&
4342             gscb->gssm &&
4343             !vcpu->arch.gs_enabled) {
4344                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4345                 vcpu->arch.sie_block->ecb |= ECB_GS;
4346                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4347                 vcpu->arch.gs_enabled = 1;
4348         }
4349         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4350             test_kvm_facility(vcpu->kvm, 82)) {
4351                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4352                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4353         }
4354         if (MACHINE_HAS_GS) {
4355                 preempt_disable();
4356                 __ctl_set_bit(2, 4);
4357                 if (current->thread.gs_cb) {
4358                         vcpu->arch.host_gscb = current->thread.gs_cb;
4359                         save_gs_cb(vcpu->arch.host_gscb);
4360                 }
4361                 if (vcpu->arch.gs_enabled) {
4362                         current->thread.gs_cb = (struct gs_cb *)
4363                                                 &vcpu->run->s.regs.gscb;
4364                         restore_gs_cb(current->thread.gs_cb);
4365                 }
4366                 preempt_enable();
4367         }
4368         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4369 }
4370
4371 static void sync_regs(struct kvm_vcpu *vcpu)
4372 {
4373         struct kvm_run *kvm_run = vcpu->run;
4374
4375         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4376                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4377         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4378                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4379                 /* some control register changes require a tlb flush */
4380                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4381         }
4382         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4383                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4384                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4385         }
4386         save_access_regs(vcpu->arch.host_acrs);
4387         restore_access_regs(vcpu->run->s.regs.acrs);
4388         /* save host (userspace) fprs/vrs */
4389         save_fpu_regs();
4390         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4391         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4392         if (MACHINE_HAS_VX)
4393                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4394         else
4395                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4396         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4397         if (test_fp_ctl(current->thread.fpu.fpc))
4398                 /* User space provided an invalid FPC, let's clear it */
4399                 current->thread.fpu.fpc = 0;
4400
4401         /* Sync fmt2 only data */
4402         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4403                 sync_regs_fmt2(vcpu);
4404         } else {
4405                 /*
4406                  * In several places we have to modify our internal view to
4407                  * not do things that are disallowed by the ultravisor. For
4408                  * example we must not inject interrupts after specific exits
4409                  * (e.g. 112 prefix page not secure). We do this by turning
4410                  * off the machine check, external and I/O interrupt bits
4411                  * of our PSW copy. To avoid getting validity intercepts, we
4412                  * do only accept the condition code from userspace.
4413                  */
4414                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4415                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4416                                                    PSW_MASK_CC;
4417         }
4418
4419         kvm_run->kvm_dirty_regs = 0;
4420 }
4421
4422 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4423 {
4424         struct kvm_run *kvm_run = vcpu->run;
4425
4426         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4427         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4428         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4429         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4430         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4431         if (MACHINE_HAS_GS) {
4432                 preempt_disable();
4433                 __ctl_set_bit(2, 4);
4434                 if (vcpu->arch.gs_enabled)
4435                         save_gs_cb(current->thread.gs_cb);
4436                 current->thread.gs_cb = vcpu->arch.host_gscb;
4437                 restore_gs_cb(vcpu->arch.host_gscb);
4438                 if (!vcpu->arch.host_gscb)
4439                         __ctl_clear_bit(2, 4);
4440                 vcpu->arch.host_gscb = NULL;
4441                 preempt_enable();
4442         }
4443         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4444 }
4445
4446 static void store_regs(struct kvm_vcpu *vcpu)
4447 {
4448         struct kvm_run *kvm_run = vcpu->run;
4449
4450         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4451         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4452         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4453         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4454         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4455         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4456         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4457         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4458         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4459         save_access_regs(vcpu->run->s.regs.acrs);
4460         restore_access_regs(vcpu->arch.host_acrs);
4461         /* Save guest register state */
4462         save_fpu_regs();
4463         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4464         /* Restore will be done lazily at return */
4465         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4466         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4467         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4468                 store_regs_fmt2(vcpu);
4469 }
4470
4471 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4472 {
4473         struct kvm_run *kvm_run = vcpu->run;
4474         int rc;
4475
4476         if (kvm_run->immediate_exit)
4477                 return -EINTR;
4478
4479         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4480             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4481                 return -EINVAL;
4482
4483         vcpu_load(vcpu);
4484
4485         if (guestdbg_exit_pending(vcpu)) {
4486                 kvm_s390_prepare_debug_exit(vcpu);
4487                 rc = 0;
4488                 goto out;
4489         }
4490
4491         kvm_sigset_activate(vcpu);
4492
4493         /*
4494          * no need to check the return value of vcpu_start as it can only have
4495          * an error for protvirt, but protvirt means user cpu state
4496          */
4497         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4498                 kvm_s390_vcpu_start(vcpu);
4499         } else if (is_vcpu_stopped(vcpu)) {
4500                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4501                                    vcpu->vcpu_id);
4502                 rc = -EINVAL;
4503                 goto out;
4504         }
4505
4506         sync_regs(vcpu);
4507         enable_cpu_timer_accounting(vcpu);
4508
4509         might_fault();
4510         rc = __vcpu_run(vcpu);
4511
4512         if (signal_pending(current) && !rc) {
4513                 kvm_run->exit_reason = KVM_EXIT_INTR;
4514                 rc = -EINTR;
4515         }
4516
4517         if (guestdbg_exit_pending(vcpu) && !rc)  {
4518                 kvm_s390_prepare_debug_exit(vcpu);
4519                 rc = 0;
4520         }
4521
4522         if (rc == -EREMOTE) {
4523                 /* userspace support is needed, kvm_run has been prepared */
4524                 rc = 0;
4525         }
4526
4527         disable_cpu_timer_accounting(vcpu);
4528         store_regs(vcpu);
4529
4530         kvm_sigset_deactivate(vcpu);
4531
4532         vcpu->stat.exit_userspace++;
4533 out:
4534         vcpu_put(vcpu);
4535         return rc;
4536 }
4537
4538 /*
4539  * store status at address
4540  * we use have two special cases:
4541  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4542  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4543  */
4544 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4545 {
4546         unsigned char archmode = 1;
4547         freg_t fprs[NUM_FPRS];
4548         unsigned int px;
4549         u64 clkcomp, cputm;
4550         int rc;
4551
4552         px = kvm_s390_get_prefix(vcpu);
4553         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4554                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4555                         return -EFAULT;
4556                 gpa = 0;
4557         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4558                 if (write_guest_real(vcpu, 163, &archmode, 1))
4559                         return -EFAULT;
4560                 gpa = px;
4561         } else
4562                 gpa -= __LC_FPREGS_SAVE_AREA;
4563
4564         /* manually convert vector registers if necessary */
4565         if (MACHINE_HAS_VX) {
4566                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4567                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4568                                      fprs, 128);
4569         } else {
4570                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4571                                      vcpu->run->s.regs.fprs, 128);
4572         }
4573         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4574                               vcpu->run->s.regs.gprs, 128);
4575         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4576                               &vcpu->arch.sie_block->gpsw, 16);
4577         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4578                               &px, 4);
4579         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4580                               &vcpu->run->s.regs.fpc, 4);
4581         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4582                               &vcpu->arch.sie_block->todpr, 4);
4583         cputm = kvm_s390_get_cpu_timer(vcpu);
4584         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4585                               &cputm, 8);
4586         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4587         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4588                               &clkcomp, 8);
4589         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4590                               &vcpu->run->s.regs.acrs, 64);
4591         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4592                               &vcpu->arch.sie_block->gcr, 128);
4593         return rc ? -EFAULT : 0;
4594 }
4595
4596 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4597 {
4598         /*
4599          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4600          * switch in the run ioctl. Let's update our copies before we save
4601          * it into the save area
4602          */
4603         save_fpu_regs();
4604         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4605         save_access_regs(vcpu->run->s.regs.acrs);
4606
4607         return kvm_s390_store_status_unloaded(vcpu, addr);
4608 }
4609
4610 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4611 {
4612         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4613         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4614 }
4615
4616 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4617 {
4618         unsigned long i;
4619         struct kvm_vcpu *vcpu;
4620
4621         kvm_for_each_vcpu(i, vcpu, kvm) {
4622                 __disable_ibs_on_vcpu(vcpu);
4623         }
4624 }
4625
4626 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4627 {
4628         if (!sclp.has_ibs)
4629                 return;
4630         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4631         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4632 }
4633
4634 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4635 {
4636         int i, online_vcpus, r = 0, started_vcpus = 0;
4637
4638         if (!is_vcpu_stopped(vcpu))
4639                 return 0;
4640
4641         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4642         /* Only one cpu at a time may enter/leave the STOPPED state. */
4643         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4644         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4645
4646         /* Let's tell the UV that we want to change into the operating state */
4647         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4648                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4649                 if (r) {
4650                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4651                         return r;
4652                 }
4653         }
4654
4655         for (i = 0; i < online_vcpus; i++) {
4656                 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4657                         started_vcpus++;
4658         }
4659
4660         if (started_vcpus == 0) {
4661                 /* we're the only active VCPU -> speed it up */
4662                 __enable_ibs_on_vcpu(vcpu);
4663         } else if (started_vcpus == 1) {
4664                 /*
4665                  * As we are starting a second VCPU, we have to disable
4666                  * the IBS facility on all VCPUs to remove potentially
4667                  * outstanding ENABLE requests.
4668                  */
4669                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4670         }
4671
4672         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4673         /*
4674          * The real PSW might have changed due to a RESTART interpreted by the
4675          * ultravisor. We block all interrupts and let the next sie exit
4676          * refresh our view.
4677          */
4678         if (kvm_s390_pv_cpu_is_protected(vcpu))
4679                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4680         /*
4681          * Another VCPU might have used IBS while we were offline.
4682          * Let's play safe and flush the VCPU at startup.
4683          */
4684         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4685         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4686         return 0;
4687 }
4688
4689 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4690 {
4691         int i, online_vcpus, r = 0, started_vcpus = 0;
4692         struct kvm_vcpu *started_vcpu = NULL;
4693
4694         if (is_vcpu_stopped(vcpu))
4695                 return 0;
4696
4697         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4698         /* Only one cpu at a time may enter/leave the STOPPED state. */
4699         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4700         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4701
4702         /* Let's tell the UV that we want to change into the stopped state */
4703         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4704                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4705                 if (r) {
4706                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4707                         return r;
4708                 }
4709         }
4710
4711         /*
4712          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4713          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4714          * have been fully processed. This will ensure that the VCPU
4715          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4716          */
4717         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4718         kvm_s390_clear_stop_irq(vcpu);
4719
4720         __disable_ibs_on_vcpu(vcpu);
4721
4722         for (i = 0; i < online_vcpus; i++) {
4723                 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4724
4725                 if (!is_vcpu_stopped(tmp)) {
4726                         started_vcpus++;
4727                         started_vcpu = tmp;
4728                 }
4729         }
4730
4731         if (started_vcpus == 1) {
4732                 /*
4733                  * As we only have one VCPU left, we want to enable the
4734                  * IBS facility for that VCPU to speed it up.
4735                  */
4736                 __enable_ibs_on_vcpu(started_vcpu);
4737         }
4738
4739         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4740         return 0;
4741 }
4742
4743 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4744                                      struct kvm_enable_cap *cap)
4745 {
4746         int r;
4747
4748         if (cap->flags)
4749                 return -EINVAL;
4750
4751         switch (cap->cap) {
4752         case KVM_CAP_S390_CSS_SUPPORT:
4753                 if (!vcpu->kvm->arch.css_support) {
4754                         vcpu->kvm->arch.css_support = 1;
4755                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4756                         trace_kvm_s390_enable_css(vcpu->kvm);
4757                 }
4758                 r = 0;
4759                 break;
4760         default:
4761                 r = -EINVAL;
4762                 break;
4763         }
4764         return r;
4765 }
4766
4767 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4768                                   struct kvm_s390_mem_op *mop)
4769 {
4770         void __user *uaddr = (void __user *)mop->buf;
4771         int r = 0;
4772
4773         if (mop->flags || !mop->size)
4774                 return -EINVAL;
4775         if (mop->size + mop->sida_offset < mop->size)
4776                 return -EINVAL;
4777         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4778                 return -E2BIG;
4779         if (!kvm_s390_pv_cpu_is_protected(vcpu))
4780                 return -EINVAL;
4781
4782         switch (mop->op) {
4783         case KVM_S390_MEMOP_SIDA_READ:
4784                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4785                                  mop->sida_offset), mop->size))
4786                         r = -EFAULT;
4787
4788                 break;
4789         case KVM_S390_MEMOP_SIDA_WRITE:
4790                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4791                                    mop->sida_offset), uaddr, mop->size))
4792                         r = -EFAULT;
4793                 break;
4794         }
4795         return r;
4796 }
4797
4798 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4799                                  struct kvm_s390_mem_op *mop)
4800 {
4801         void __user *uaddr = (void __user *)mop->buf;
4802         void *tmpbuf = NULL;
4803         int r = 0;
4804         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4805                                     | KVM_S390_MEMOP_F_CHECK_ONLY
4806                                     | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4807
4808         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4809                 return -EINVAL;
4810         if (mop->size > MEM_OP_MAX_SIZE)
4811                 return -E2BIG;
4812         if (kvm_s390_pv_cpu_is_protected(vcpu))
4813                 return -EINVAL;
4814         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4815                 if (access_key_invalid(mop->key))
4816                         return -EINVAL;
4817         } else {
4818                 mop->key = 0;
4819         }
4820         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4821                 tmpbuf = vmalloc(mop->size);
4822                 if (!tmpbuf)
4823                         return -ENOMEM;
4824         }
4825
4826         switch (mop->op) {
4827         case KVM_S390_MEMOP_LOGICAL_READ:
4828                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4829                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4830                                             GACC_FETCH, mop->key);
4831                         break;
4832                 }
4833                 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4834                                         mop->size, mop->key);
4835                 if (r == 0) {
4836                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4837                                 r = -EFAULT;
4838                 }
4839                 break;
4840         case KVM_S390_MEMOP_LOGICAL_WRITE:
4841                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4842                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4843                                             GACC_STORE, mop->key);
4844                         break;
4845                 }
4846                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4847                         r = -EFAULT;
4848                         break;
4849                 }
4850                 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4851                                          mop->size, mop->key);
4852                 break;
4853         }
4854
4855         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4856                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4857
4858         vfree(tmpbuf);
4859         return r;
4860 }
4861
4862 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4863                                      struct kvm_s390_mem_op *mop)
4864 {
4865         int r, srcu_idx;
4866
4867         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4868
4869         switch (mop->op) {
4870         case KVM_S390_MEMOP_LOGICAL_READ:
4871         case KVM_S390_MEMOP_LOGICAL_WRITE:
4872                 r = kvm_s390_vcpu_mem_op(vcpu, mop);
4873                 break;
4874         case KVM_S390_MEMOP_SIDA_READ:
4875         case KVM_S390_MEMOP_SIDA_WRITE:
4876                 /* we are locked against sida going away by the vcpu->mutex */
4877                 r = kvm_s390_vcpu_sida_op(vcpu, mop);
4878                 break;
4879         default:
4880                 r = -EINVAL;
4881         }
4882
4883         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4884         return r;
4885 }
4886
4887 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4888                                unsigned int ioctl, unsigned long arg)
4889 {
4890         struct kvm_vcpu *vcpu = filp->private_data;
4891         void __user *argp = (void __user *)arg;
4892
4893         switch (ioctl) {
4894         case KVM_S390_IRQ: {
4895                 struct kvm_s390_irq s390irq;
4896
4897                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4898                         return -EFAULT;
4899                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4900         }
4901         case KVM_S390_INTERRUPT: {
4902                 struct kvm_s390_interrupt s390int;
4903                 struct kvm_s390_irq s390irq = {};
4904
4905                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4906                         return -EFAULT;
4907                 if (s390int_to_s390irq(&s390int, &s390irq))
4908                         return -EINVAL;
4909                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4910         }
4911         }
4912         return -ENOIOCTLCMD;
4913 }
4914
4915 long kvm_arch_vcpu_ioctl(struct file *filp,
4916                          unsigned int ioctl, unsigned long arg)
4917 {
4918         struct kvm_vcpu *vcpu = filp->private_data;
4919         void __user *argp = (void __user *)arg;
4920         int idx;
4921         long r;
4922         u16 rc, rrc;
4923
4924         vcpu_load(vcpu);
4925
4926         switch (ioctl) {
4927         case KVM_S390_STORE_STATUS:
4928                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4929                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4930                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4931                 break;
4932         case KVM_S390_SET_INITIAL_PSW: {
4933                 psw_t psw;
4934
4935                 r = -EFAULT;
4936                 if (copy_from_user(&psw, argp, sizeof(psw)))
4937                         break;
4938                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4939                 break;
4940         }
4941         case KVM_S390_CLEAR_RESET:
4942                 r = 0;
4943                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4944                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4945                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4946                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4947                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4948                                    rc, rrc);
4949                 }
4950                 break;
4951         case KVM_S390_INITIAL_RESET:
4952                 r = 0;
4953                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4954                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4955                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4956                                           UVC_CMD_CPU_RESET_INITIAL,
4957                                           &rc, &rrc);
4958                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4959                                    rc, rrc);
4960                 }
4961                 break;
4962         case KVM_S390_NORMAL_RESET:
4963                 r = 0;
4964                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4965                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4966                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4967                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4968                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4969                                    rc, rrc);
4970                 }
4971                 break;
4972         case KVM_SET_ONE_REG:
4973         case KVM_GET_ONE_REG: {
4974                 struct kvm_one_reg reg;
4975                 r = -EINVAL;
4976                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4977                         break;
4978                 r = -EFAULT;
4979                 if (copy_from_user(&reg, argp, sizeof(reg)))
4980                         break;
4981                 if (ioctl == KVM_SET_ONE_REG)
4982                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4983                 else
4984                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4985                 break;
4986         }
4987 #ifdef CONFIG_KVM_S390_UCONTROL
4988         case KVM_S390_UCAS_MAP: {
4989                 struct kvm_s390_ucas_mapping ucasmap;
4990
4991                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4992                         r = -EFAULT;
4993                         break;
4994                 }
4995
4996                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4997                         r = -EINVAL;
4998                         break;
4999                 }
5000
5001                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
5002                                      ucasmap.vcpu_addr, ucasmap.length);
5003                 break;
5004         }
5005         case KVM_S390_UCAS_UNMAP: {
5006                 struct kvm_s390_ucas_mapping ucasmap;
5007
5008                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5009                         r = -EFAULT;
5010                         break;
5011                 }
5012
5013                 if (!kvm_is_ucontrol(vcpu->kvm)) {
5014                         r = -EINVAL;
5015                         break;
5016                 }
5017
5018                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5019                         ucasmap.length);
5020                 break;
5021         }
5022 #endif
5023         case KVM_S390_VCPU_FAULT: {
5024                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5025                 break;
5026         }
5027         case KVM_ENABLE_CAP:
5028         {
5029                 struct kvm_enable_cap cap;
5030                 r = -EFAULT;
5031                 if (copy_from_user(&cap, argp, sizeof(cap)))
5032                         break;
5033                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5034                 break;
5035         }
5036         case KVM_S390_MEM_OP: {
5037                 struct kvm_s390_mem_op mem_op;
5038
5039                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5040                         r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5041                 else
5042                         r = -EFAULT;
5043                 break;
5044         }
5045         case KVM_S390_SET_IRQ_STATE: {
5046                 struct kvm_s390_irq_state irq_state;
5047
5048                 r = -EFAULT;
5049                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5050                         break;
5051                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5052                     irq_state.len == 0 ||
5053                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5054                         r = -EINVAL;
5055                         break;
5056                 }
5057                 /* do not use irq_state.flags, it will break old QEMUs */
5058                 r = kvm_s390_set_irq_state(vcpu,
5059                                            (void __user *) irq_state.buf,
5060                                            irq_state.len);
5061                 break;
5062         }
5063         case KVM_S390_GET_IRQ_STATE: {
5064                 struct kvm_s390_irq_state irq_state;
5065
5066                 r = -EFAULT;
5067                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5068                         break;
5069                 if (irq_state.len == 0) {
5070                         r = -EINVAL;
5071                         break;
5072                 }
5073                 /* do not use irq_state.flags, it will break old QEMUs */
5074                 r = kvm_s390_get_irq_state(vcpu,
5075                                            (__u8 __user *)  irq_state.buf,
5076                                            irq_state.len);
5077                 break;
5078         }
5079         default:
5080                 r = -ENOTTY;
5081         }
5082
5083         vcpu_put(vcpu);
5084         return r;
5085 }
5086
5087 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5088 {
5089 #ifdef CONFIG_KVM_S390_UCONTROL
5090         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5091                  && (kvm_is_ucontrol(vcpu->kvm))) {
5092                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5093                 get_page(vmf->page);
5094                 return 0;
5095         }
5096 #endif
5097         return VM_FAULT_SIGBUS;
5098 }
5099
5100 /* Section: memory related */
5101 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5102                                    const struct kvm_memory_slot *old,
5103                                    struct kvm_memory_slot *new,
5104                                    enum kvm_mr_change change)
5105 {
5106         gpa_t size;
5107
5108         /* When we are protected, we should not change the memory slots */
5109         if (kvm_s390_pv_get_handle(kvm))
5110                 return -EINVAL;
5111
5112         if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5113                 return 0;
5114
5115         /* A few sanity checks. We can have memory slots which have to be
5116            located/ended at a segment boundary (1MB). The memory in userland is
5117            ok to be fragmented into various different vmas. It is okay to mmap()
5118            and munmap() stuff in this slot after doing this call at any time */
5119
5120         if (new->userspace_addr & 0xffffful)
5121                 return -EINVAL;
5122
5123         size = new->npages * PAGE_SIZE;
5124         if (size & 0xffffful)
5125                 return -EINVAL;
5126
5127         if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5128                 return -EINVAL;
5129
5130         return 0;
5131 }
5132
5133 void kvm_arch_commit_memory_region(struct kvm *kvm,
5134                                 struct kvm_memory_slot *old,
5135                                 const struct kvm_memory_slot *new,
5136                                 enum kvm_mr_change change)
5137 {
5138         int rc = 0;
5139
5140         switch (change) {
5141         case KVM_MR_DELETE:
5142                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5143                                         old->npages * PAGE_SIZE);
5144                 break;
5145         case KVM_MR_MOVE:
5146                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5147                                         old->npages * PAGE_SIZE);
5148                 if (rc)
5149                         break;
5150                 fallthrough;
5151         case KVM_MR_CREATE:
5152                 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5153                                       new->base_gfn * PAGE_SIZE,
5154                                       new->npages * PAGE_SIZE);
5155                 break;
5156         case KVM_MR_FLAGS_ONLY:
5157                 break;
5158         default:
5159                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5160         }
5161         if (rc)
5162                 pr_warn("failed to commit memory region\n");
5163         return;
5164 }
5165
5166 static inline unsigned long nonhyp_mask(int i)
5167 {
5168         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5169
5170         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5171 }
5172
5173 static int __init kvm_s390_init(void)
5174 {
5175         int i;
5176
5177         if (!sclp.has_sief2) {
5178                 pr_info("SIE is not available\n");
5179                 return -ENODEV;
5180         }
5181
5182         if (nested && hpage) {
5183                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5184                 return -EINVAL;
5185         }
5186
5187         for (i = 0; i < 16; i++)
5188                 kvm_s390_fac_base[i] |=
5189                         stfle_fac_list[i] & nonhyp_mask(i);
5190
5191         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5192 }
5193
5194 static void __exit kvm_s390_exit(void)
5195 {
5196         kvm_exit();
5197 }
5198
5199 module_init(kvm_s390_init);
5200 module_exit(kvm_s390_exit);
5201
5202 /*
5203  * Enable autoloading of the kvm module.
5204  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5205  * since x86 takes a different approach.
5206  */
5207 #include <linux/miscdevice.h>
5208 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5209 MODULE_ALIAS("devname:kvm");