Merge tag 'nmiforkvm' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux...
[linux-2.6-microblaze.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56
57 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60                            (KVM_MAX_VCPUS + LOCAL_IRQS))
61
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65         { "userspace_handled", VCPU_STAT(exit_userspace) },
66         { "exit_null", VCPU_STAT(exit_null) },
67         { "exit_validity", VCPU_STAT(exit_validity) },
68         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
69         { "exit_external_request", VCPU_STAT(exit_external_request) },
70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71         { "exit_instruction", VCPU_STAT(exit_instruction) },
72         { "exit_pei", VCPU_STAT(exit_pei) },
73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
84         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
86         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
93         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
95         { "instruction_spx", VCPU_STAT(instruction_spx) },
96         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
97         { "instruction_stap", VCPU_STAT(instruction_stap) },
98         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
101         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
102         { "instruction_essa", VCPU_STAT(instruction_essa) },
103         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
104         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
105         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
106         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107         { "instruction_sie", VCPU_STAT(instruction_sie) },
108         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124         { "diagnose_10", VCPU_STAT(diagnose_10) },
125         { "diagnose_44", VCPU_STAT(diagnose_44) },
126         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
127         { "diagnose_258", VCPU_STAT(diagnose_258) },
128         { "diagnose_308", VCPU_STAT(diagnose_308) },
129         { "diagnose_500", VCPU_STAT(diagnose_500) },
130         { NULL }
131 };
132
133 /* allow nested virtualization in KVM (if enabled by user space) */
134 static int nested;
135 module_param(nested, int, S_IRUGO);
136 MODULE_PARM_DESC(nested, "Nested virtualization support");
137
138 /* upper facilities limit for kvm */
139 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
140
141 unsigned long kvm_s390_fac_list_mask_size(void)
142 {
143         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
144         return ARRAY_SIZE(kvm_s390_fac_list_mask);
145 }
146
147 /* available cpu features supported by kvm */
148 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
149 /* available subfunctions indicated via query / "test bit" */
150 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
151
152 static struct gmap_notifier gmap_notifier;
153 static struct gmap_notifier vsie_gmap_notifier;
154 debug_info_t *kvm_s390_dbf;
155
156 /* Section: not file related */
157 int kvm_arch_hardware_enable(void)
158 {
159         /* every s390 is virtualization enabled ;-) */
160         return 0;
161 }
162
163 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
164                               unsigned long end);
165
166 /*
167  * This callback is executed during stop_machine(). All CPUs are therefore
168  * temporarily stopped. In order not to change guest behavior, we have to
169  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
170  * so a CPU won't be stopped while calculating with the epoch.
171  */
172 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
173                           void *v)
174 {
175         struct kvm *kvm;
176         struct kvm_vcpu *vcpu;
177         int i;
178         unsigned long long *delta = v;
179
180         list_for_each_entry(kvm, &vm_list, vm_list) {
181                 kvm->arch.epoch -= *delta;
182                 kvm_for_each_vcpu(i, vcpu, kvm) {
183                         vcpu->arch.sie_block->epoch -= *delta;
184                         if (vcpu->arch.cputm_enabled)
185                                 vcpu->arch.cputm_start += *delta;
186                         if (vcpu->arch.vsie_block)
187                                 vcpu->arch.vsie_block->epoch -= *delta;
188                 }
189         }
190         return NOTIFY_OK;
191 }
192
193 static struct notifier_block kvm_clock_notifier = {
194         .notifier_call = kvm_clock_sync,
195 };
196
197 int kvm_arch_hardware_setup(void)
198 {
199         gmap_notifier.notifier_call = kvm_gmap_notifier;
200         gmap_register_pte_notifier(&gmap_notifier);
201         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
202         gmap_register_pte_notifier(&vsie_gmap_notifier);
203         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
204                                        &kvm_clock_notifier);
205         return 0;
206 }
207
208 void kvm_arch_hardware_unsetup(void)
209 {
210         gmap_unregister_pte_notifier(&gmap_notifier);
211         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
212         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
213                                          &kvm_clock_notifier);
214 }
215
216 static void allow_cpu_feat(unsigned long nr)
217 {
218         set_bit_inv(nr, kvm_s390_available_cpu_feat);
219 }
220
221 static inline int plo_test_bit(unsigned char nr)
222 {
223         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
224         int cc;
225
226         asm volatile(
227                 /* Parameter registers are ignored for "test bit" */
228                 "       plo     0,0,0,0(0)\n"
229                 "       ipm     %0\n"
230                 "       srl     %0,28\n"
231                 : "=d" (cc)
232                 : "d" (r0)
233                 : "cc");
234         return cc == 0;
235 }
236
237 static void kvm_s390_cpu_feat_init(void)
238 {
239         int i;
240
241         for (i = 0; i < 256; ++i) {
242                 if (plo_test_bit(i))
243                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
244         }
245
246         if (test_facility(28)) /* TOD-clock steering */
247                 ptff(kvm_s390_available_subfunc.ptff,
248                      sizeof(kvm_s390_available_subfunc.ptff),
249                      PTFF_QAF);
250
251         if (test_facility(17)) { /* MSA */
252                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.kmac);
254                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kmc);
256                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.km);
258                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
259                               kvm_s390_available_subfunc.kimd);
260                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.klmd);
262         }
263         if (test_facility(76)) /* MSA3 */
264                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
265                               kvm_s390_available_subfunc.pckmo);
266         if (test_facility(77)) { /* MSA4 */
267                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmctr);
269                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.kmf);
271                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
272                               kvm_s390_available_subfunc.kmo);
273                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.pcc);
275         }
276         if (test_facility(57)) /* MSA5 */
277                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
278                               kvm_s390_available_subfunc.ppno);
279
280         if (test_facility(146)) /* MSA8 */
281                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
282                               kvm_s390_available_subfunc.kma);
283
284         if (MACHINE_HAS_ESOP)
285                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
286         /*
287          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
288          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
289          */
290         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
291             !test_facility(3) || !nested)
292                 return;
293         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
294         if (sclp.has_64bscao)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
296         if (sclp.has_siif)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
298         if (sclp.has_gpere)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
300         if (sclp.has_gsls)
301                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
302         if (sclp.has_ib)
303                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
304         if (sclp.has_cei)
305                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
306         if (sclp.has_ibs)
307                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
308         if (sclp.has_kss)
309                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
310         /*
311          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
312          * all skey handling functions read/set the skey from the PGSTE
313          * instead of the real storage key.
314          *
315          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
316          * pages being detected as preserved although they are resident.
317          *
318          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
319          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
320          *
321          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
322          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
323          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
324          *
325          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
326          * cannot easily shadow the SCA because of the ipte lock.
327          */
328 }
329
330 int kvm_arch_init(void *opaque)
331 {
332         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
333         if (!kvm_s390_dbf)
334                 return -ENOMEM;
335
336         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
337                 debug_unregister(kvm_s390_dbf);
338                 return -ENOMEM;
339         }
340
341         kvm_s390_cpu_feat_init();
342
343         /* Register floating interrupt controller interface. */
344         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
345 }
346
347 void kvm_arch_exit(void)
348 {
349         debug_unregister(kvm_s390_dbf);
350 }
351
352 /* Section: device related */
353 long kvm_arch_dev_ioctl(struct file *filp,
354                         unsigned int ioctl, unsigned long arg)
355 {
356         if (ioctl == KVM_S390_ENABLE_SIE)
357                 return s390_enable_sie();
358         return -EINVAL;
359 }
360
361 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
362 {
363         int r;
364
365         switch (ext) {
366         case KVM_CAP_S390_PSW:
367         case KVM_CAP_S390_GMAP:
368         case KVM_CAP_SYNC_MMU:
369 #ifdef CONFIG_KVM_S390_UCONTROL
370         case KVM_CAP_S390_UCONTROL:
371 #endif
372         case KVM_CAP_ASYNC_PF:
373         case KVM_CAP_SYNC_REGS:
374         case KVM_CAP_ONE_REG:
375         case KVM_CAP_ENABLE_CAP:
376         case KVM_CAP_S390_CSS_SUPPORT:
377         case KVM_CAP_IOEVENTFD:
378         case KVM_CAP_DEVICE_CTRL:
379         case KVM_CAP_ENABLE_CAP_VM:
380         case KVM_CAP_S390_IRQCHIP:
381         case KVM_CAP_VM_ATTRIBUTES:
382         case KVM_CAP_MP_STATE:
383         case KVM_CAP_IMMEDIATE_EXIT:
384         case KVM_CAP_S390_INJECT_IRQ:
385         case KVM_CAP_S390_USER_SIGP:
386         case KVM_CAP_S390_USER_STSI:
387         case KVM_CAP_S390_SKEYS:
388         case KVM_CAP_S390_IRQ_STATE:
389         case KVM_CAP_S390_USER_INSTR0:
390         case KVM_CAP_S390_CMMA_MIGRATION:
391         case KVM_CAP_S390_AIS:
392                 r = 1;
393                 break;
394         case KVM_CAP_S390_MEM_OP:
395                 r = MEM_OP_MAX_SIZE;
396                 break;
397         case KVM_CAP_NR_VCPUS:
398         case KVM_CAP_MAX_VCPUS:
399                 r = KVM_S390_BSCA_CPU_SLOTS;
400                 if (!kvm_s390_use_sca_entries())
401                         r = KVM_MAX_VCPUS;
402                 else if (sclp.has_esca && sclp.has_64bscao)
403                         r = KVM_S390_ESCA_CPU_SLOTS;
404                 break;
405         case KVM_CAP_NR_MEMSLOTS:
406                 r = KVM_USER_MEM_SLOTS;
407                 break;
408         case KVM_CAP_S390_COW:
409                 r = MACHINE_HAS_ESOP;
410                 break;
411         case KVM_CAP_S390_VECTOR_REGISTERS:
412                 r = MACHINE_HAS_VX;
413                 break;
414         case KVM_CAP_S390_RI:
415                 r = test_facility(64);
416                 break;
417         case KVM_CAP_S390_GS:
418                 r = test_facility(133);
419                 break;
420         default:
421                 r = 0;
422         }
423         return r;
424 }
425
426 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
427                                         struct kvm_memory_slot *memslot)
428 {
429         gfn_t cur_gfn, last_gfn;
430         unsigned long address;
431         struct gmap *gmap = kvm->arch.gmap;
432
433         /* Loop over all guest pages */
434         last_gfn = memslot->base_gfn + memslot->npages;
435         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
436                 address = gfn_to_hva_memslot(memslot, cur_gfn);
437
438                 if (test_and_clear_guest_dirty(gmap->mm, address))
439                         mark_page_dirty(kvm, cur_gfn);
440                 if (fatal_signal_pending(current))
441                         return;
442                 cond_resched();
443         }
444 }
445
446 /* Section: vm related */
447 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
448
449 /*
450  * Get (and clear) the dirty memory log for a memory slot.
451  */
452 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
453                                struct kvm_dirty_log *log)
454 {
455         int r;
456         unsigned long n;
457         struct kvm_memslots *slots;
458         struct kvm_memory_slot *memslot;
459         int is_dirty = 0;
460
461         if (kvm_is_ucontrol(kvm))
462                 return -EINVAL;
463
464         mutex_lock(&kvm->slots_lock);
465
466         r = -EINVAL;
467         if (log->slot >= KVM_USER_MEM_SLOTS)
468                 goto out;
469
470         slots = kvm_memslots(kvm);
471         memslot = id_to_memslot(slots, log->slot);
472         r = -ENOENT;
473         if (!memslot->dirty_bitmap)
474                 goto out;
475
476         kvm_s390_sync_dirty_log(kvm, memslot);
477         r = kvm_get_dirty_log(kvm, log, &is_dirty);
478         if (r)
479                 goto out;
480
481         /* Clear the dirty log */
482         if (is_dirty) {
483                 n = kvm_dirty_bitmap_bytes(memslot);
484                 memset(memslot->dirty_bitmap, 0, n);
485         }
486         r = 0;
487 out:
488         mutex_unlock(&kvm->slots_lock);
489         return r;
490 }
491
492 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
493 {
494         unsigned int i;
495         struct kvm_vcpu *vcpu;
496
497         kvm_for_each_vcpu(i, vcpu, kvm) {
498                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
499         }
500 }
501
502 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
503 {
504         int r;
505
506         if (cap->flags)
507                 return -EINVAL;
508
509         switch (cap->cap) {
510         case KVM_CAP_S390_IRQCHIP:
511                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
512                 kvm->arch.use_irqchip = 1;
513                 r = 0;
514                 break;
515         case KVM_CAP_S390_USER_SIGP:
516                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
517                 kvm->arch.user_sigp = 1;
518                 r = 0;
519                 break;
520         case KVM_CAP_S390_VECTOR_REGISTERS:
521                 mutex_lock(&kvm->lock);
522                 if (kvm->created_vcpus) {
523                         r = -EBUSY;
524                 } else if (MACHINE_HAS_VX) {
525                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
526                         set_kvm_facility(kvm->arch.model.fac_list, 129);
527                         if (test_facility(134)) {
528                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
529                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
530                         }
531                         if (test_facility(135)) {
532                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
533                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
534                         }
535                         r = 0;
536                 } else
537                         r = -EINVAL;
538                 mutex_unlock(&kvm->lock);
539                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
540                          r ? "(not available)" : "(success)");
541                 break;
542         case KVM_CAP_S390_RI:
543                 r = -EINVAL;
544                 mutex_lock(&kvm->lock);
545                 if (kvm->created_vcpus) {
546                         r = -EBUSY;
547                 } else if (test_facility(64)) {
548                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
549                         set_kvm_facility(kvm->arch.model.fac_list, 64);
550                         r = 0;
551                 }
552                 mutex_unlock(&kvm->lock);
553                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
554                          r ? "(not available)" : "(success)");
555                 break;
556         case KVM_CAP_S390_AIS:
557                 mutex_lock(&kvm->lock);
558                 if (kvm->created_vcpus) {
559                         r = -EBUSY;
560                 } else {
561                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
562                         set_kvm_facility(kvm->arch.model.fac_list, 72);
563                         kvm->arch.float_int.ais_enabled = 1;
564                         r = 0;
565                 }
566                 mutex_unlock(&kvm->lock);
567                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
568                          r ? "(not available)" : "(success)");
569                 break;
570         case KVM_CAP_S390_GS:
571                 r = -EINVAL;
572                 mutex_lock(&kvm->lock);
573                 if (atomic_read(&kvm->online_vcpus)) {
574                         r = -EBUSY;
575                 } else if (test_facility(133)) {
576                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
577                         set_kvm_facility(kvm->arch.model.fac_list, 133);
578                         r = 0;
579                 }
580                 mutex_unlock(&kvm->lock);
581                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
582                          r ? "(not available)" : "(success)");
583                 break;
584         case KVM_CAP_S390_USER_STSI:
585                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
586                 kvm->arch.user_stsi = 1;
587                 r = 0;
588                 break;
589         case KVM_CAP_S390_USER_INSTR0:
590                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
591                 kvm->arch.user_instr0 = 1;
592                 icpt_operexc_on_all_vcpus(kvm);
593                 r = 0;
594                 break;
595         default:
596                 r = -EINVAL;
597                 break;
598         }
599         return r;
600 }
601
602 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
603 {
604         int ret;
605
606         switch (attr->attr) {
607         case KVM_S390_VM_MEM_LIMIT_SIZE:
608                 ret = 0;
609                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
610                          kvm->arch.mem_limit);
611                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
612                         ret = -EFAULT;
613                 break;
614         default:
615                 ret = -ENXIO;
616                 break;
617         }
618         return ret;
619 }
620
621 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
622 {
623         int ret;
624         unsigned int idx;
625         switch (attr->attr) {
626         case KVM_S390_VM_MEM_ENABLE_CMMA:
627                 ret = -ENXIO;
628                 if (!sclp.has_cmma)
629                         break;
630
631                 ret = -EBUSY;
632                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
633                 mutex_lock(&kvm->lock);
634                 if (!kvm->created_vcpus) {
635                         kvm->arch.use_cmma = 1;
636                         ret = 0;
637                 }
638                 mutex_unlock(&kvm->lock);
639                 break;
640         case KVM_S390_VM_MEM_CLR_CMMA:
641                 ret = -ENXIO;
642                 if (!sclp.has_cmma)
643                         break;
644                 ret = -EINVAL;
645                 if (!kvm->arch.use_cmma)
646                         break;
647
648                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
649                 mutex_lock(&kvm->lock);
650                 idx = srcu_read_lock(&kvm->srcu);
651                 s390_reset_cmma(kvm->arch.gmap->mm);
652                 srcu_read_unlock(&kvm->srcu, idx);
653                 mutex_unlock(&kvm->lock);
654                 ret = 0;
655                 break;
656         case KVM_S390_VM_MEM_LIMIT_SIZE: {
657                 unsigned long new_limit;
658
659                 if (kvm_is_ucontrol(kvm))
660                         return -EINVAL;
661
662                 if (get_user(new_limit, (u64 __user *)attr->addr))
663                         return -EFAULT;
664
665                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
666                     new_limit > kvm->arch.mem_limit)
667                         return -E2BIG;
668
669                 if (!new_limit)
670                         return -EINVAL;
671
672                 /* gmap_create takes last usable address */
673                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
674                         new_limit -= 1;
675
676                 ret = -EBUSY;
677                 mutex_lock(&kvm->lock);
678                 if (!kvm->created_vcpus) {
679                         /* gmap_create will round the limit up */
680                         struct gmap *new = gmap_create(current->mm, new_limit);
681
682                         if (!new) {
683                                 ret = -ENOMEM;
684                         } else {
685                                 gmap_remove(kvm->arch.gmap);
686                                 new->private = kvm;
687                                 kvm->arch.gmap = new;
688                                 ret = 0;
689                         }
690                 }
691                 mutex_unlock(&kvm->lock);
692                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
693                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
694                          (void *) kvm->arch.gmap->asce);
695                 break;
696         }
697         default:
698                 ret = -ENXIO;
699                 break;
700         }
701         return ret;
702 }
703
704 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
705
706 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
707 {
708         struct kvm_vcpu *vcpu;
709         int i;
710
711         if (!test_kvm_facility(kvm, 76))
712                 return -EINVAL;
713
714         mutex_lock(&kvm->lock);
715         switch (attr->attr) {
716         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
717                 get_random_bytes(
718                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
719                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
720                 kvm->arch.crypto.aes_kw = 1;
721                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
722                 break;
723         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
724                 get_random_bytes(
725                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
726                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
727                 kvm->arch.crypto.dea_kw = 1;
728                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
729                 break;
730         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
731                 kvm->arch.crypto.aes_kw = 0;
732                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
733                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
734                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
735                 break;
736         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
737                 kvm->arch.crypto.dea_kw = 0;
738                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
739                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
740                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
741                 break;
742         default:
743                 mutex_unlock(&kvm->lock);
744                 return -ENXIO;
745         }
746
747         kvm_for_each_vcpu(i, vcpu, kvm) {
748                 kvm_s390_vcpu_crypto_setup(vcpu);
749                 exit_sie(vcpu);
750         }
751         mutex_unlock(&kvm->lock);
752         return 0;
753 }
754
755 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
756 {
757         int cx;
758         struct kvm_vcpu *vcpu;
759
760         kvm_for_each_vcpu(cx, vcpu, kvm)
761                 kvm_s390_sync_request(req, vcpu);
762 }
763
764 /*
765  * Must be called with kvm->srcu held to avoid races on memslots, and with
766  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
767  */
768 static int kvm_s390_vm_start_migration(struct kvm *kvm)
769 {
770         struct kvm_s390_migration_state *mgs;
771         struct kvm_memory_slot *ms;
772         /* should be the only one */
773         struct kvm_memslots *slots;
774         unsigned long ram_pages;
775         int slotnr;
776
777         /* migration mode already enabled */
778         if (kvm->arch.migration_state)
779                 return 0;
780
781         slots = kvm_memslots(kvm);
782         if (!slots || !slots->used_slots)
783                 return -EINVAL;
784
785         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
786         if (!mgs)
787                 return -ENOMEM;
788         kvm->arch.migration_state = mgs;
789
790         if (kvm->arch.use_cmma) {
791                 /*
792                  * Get the last slot. They should be sorted by base_gfn, so the
793                  * last slot is also the one at the end of the address space.
794                  * We have verified above that at least one slot is present.
795                  */
796                 ms = slots->memslots + slots->used_slots - 1;
797                 /* round up so we only use full longs */
798                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
799                 /* allocate enough bytes to store all the bits */
800                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
801                 if (!mgs->pgste_bitmap) {
802                         kfree(mgs);
803                         kvm->arch.migration_state = NULL;
804                         return -ENOMEM;
805                 }
806
807                 mgs->bitmap_size = ram_pages;
808                 atomic64_set(&mgs->dirty_pages, ram_pages);
809                 /* mark all the pages in active slots as dirty */
810                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
811                         ms = slots->memslots + slotnr;
812                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
813                 }
814
815                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
816         }
817         return 0;
818 }
819
820 /*
821  * Must be called with kvm->lock to avoid races with ourselves and
822  * kvm_s390_vm_start_migration.
823  */
824 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
825 {
826         struct kvm_s390_migration_state *mgs;
827
828         /* migration mode already disabled */
829         if (!kvm->arch.migration_state)
830                 return 0;
831         mgs = kvm->arch.migration_state;
832         kvm->arch.migration_state = NULL;
833
834         if (kvm->arch.use_cmma) {
835                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
836                 vfree(mgs->pgste_bitmap);
837         }
838         kfree(mgs);
839         return 0;
840 }
841
842 static int kvm_s390_vm_set_migration(struct kvm *kvm,
843                                      struct kvm_device_attr *attr)
844 {
845         int idx, res = -ENXIO;
846
847         mutex_lock(&kvm->lock);
848         switch (attr->attr) {
849         case KVM_S390_VM_MIGRATION_START:
850                 idx = srcu_read_lock(&kvm->srcu);
851                 res = kvm_s390_vm_start_migration(kvm);
852                 srcu_read_unlock(&kvm->srcu, idx);
853                 break;
854         case KVM_S390_VM_MIGRATION_STOP:
855                 res = kvm_s390_vm_stop_migration(kvm);
856                 break;
857         default:
858                 break;
859         }
860         mutex_unlock(&kvm->lock);
861
862         return res;
863 }
864
865 static int kvm_s390_vm_get_migration(struct kvm *kvm,
866                                      struct kvm_device_attr *attr)
867 {
868         u64 mig = (kvm->arch.migration_state != NULL);
869
870         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
871                 return -ENXIO;
872
873         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
874                 return -EFAULT;
875         return 0;
876 }
877
878 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
879 {
880         u8 gtod_high;
881
882         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
883                                            sizeof(gtod_high)))
884                 return -EFAULT;
885
886         if (gtod_high != 0)
887                 return -EINVAL;
888         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
889
890         return 0;
891 }
892
893 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
894 {
895         u64 gtod;
896
897         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
898                 return -EFAULT;
899
900         kvm_s390_set_tod_clock(kvm, gtod);
901         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
902         return 0;
903 }
904
905 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
906 {
907         int ret;
908
909         if (attr->flags)
910                 return -EINVAL;
911
912         switch (attr->attr) {
913         case KVM_S390_VM_TOD_HIGH:
914                 ret = kvm_s390_set_tod_high(kvm, attr);
915                 break;
916         case KVM_S390_VM_TOD_LOW:
917                 ret = kvm_s390_set_tod_low(kvm, attr);
918                 break;
919         default:
920                 ret = -ENXIO;
921                 break;
922         }
923         return ret;
924 }
925
926 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
927 {
928         u8 gtod_high = 0;
929
930         if (copy_to_user((void __user *)attr->addr, &gtod_high,
931                                          sizeof(gtod_high)))
932                 return -EFAULT;
933         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
934
935         return 0;
936 }
937
938 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
939 {
940         u64 gtod;
941
942         gtod = kvm_s390_get_tod_clock_fast(kvm);
943         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
944                 return -EFAULT;
945         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
946
947         return 0;
948 }
949
950 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
951 {
952         int ret;
953
954         if (attr->flags)
955                 return -EINVAL;
956
957         switch (attr->attr) {
958         case KVM_S390_VM_TOD_HIGH:
959                 ret = kvm_s390_get_tod_high(kvm, attr);
960                 break;
961         case KVM_S390_VM_TOD_LOW:
962                 ret = kvm_s390_get_tod_low(kvm, attr);
963                 break;
964         default:
965                 ret = -ENXIO;
966                 break;
967         }
968         return ret;
969 }
970
971 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
972 {
973         struct kvm_s390_vm_cpu_processor *proc;
974         u16 lowest_ibc, unblocked_ibc;
975         int ret = 0;
976
977         mutex_lock(&kvm->lock);
978         if (kvm->created_vcpus) {
979                 ret = -EBUSY;
980                 goto out;
981         }
982         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
983         if (!proc) {
984                 ret = -ENOMEM;
985                 goto out;
986         }
987         if (!copy_from_user(proc, (void __user *)attr->addr,
988                             sizeof(*proc))) {
989                 kvm->arch.model.cpuid = proc->cpuid;
990                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
991                 unblocked_ibc = sclp.ibc & 0xfff;
992                 if (lowest_ibc && proc->ibc) {
993                         if (proc->ibc > unblocked_ibc)
994                                 kvm->arch.model.ibc = unblocked_ibc;
995                         else if (proc->ibc < lowest_ibc)
996                                 kvm->arch.model.ibc = lowest_ibc;
997                         else
998                                 kvm->arch.model.ibc = proc->ibc;
999                 }
1000                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1001                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1002                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1003                          kvm->arch.model.ibc,
1004                          kvm->arch.model.cpuid);
1005                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1006                          kvm->arch.model.fac_list[0],
1007                          kvm->arch.model.fac_list[1],
1008                          kvm->arch.model.fac_list[2]);
1009         } else
1010                 ret = -EFAULT;
1011         kfree(proc);
1012 out:
1013         mutex_unlock(&kvm->lock);
1014         return ret;
1015 }
1016
1017 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1018                                        struct kvm_device_attr *attr)
1019 {
1020         struct kvm_s390_vm_cpu_feat data;
1021         int ret = -EBUSY;
1022
1023         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1024                 return -EFAULT;
1025         if (!bitmap_subset((unsigned long *) data.feat,
1026                            kvm_s390_available_cpu_feat,
1027                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1028                 return -EINVAL;
1029
1030         mutex_lock(&kvm->lock);
1031         if (!atomic_read(&kvm->online_vcpus)) {
1032                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1033                             KVM_S390_VM_CPU_FEAT_NR_BITS);
1034                 ret = 0;
1035         }
1036         mutex_unlock(&kvm->lock);
1037         return ret;
1038 }
1039
1040 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1041                                           struct kvm_device_attr *attr)
1042 {
1043         /*
1044          * Once supported by kernel + hw, we have to store the subfunctions
1045          * in kvm->arch and remember that user space configured them.
1046          */
1047         return -ENXIO;
1048 }
1049
1050 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1051 {
1052         int ret = -ENXIO;
1053
1054         switch (attr->attr) {
1055         case KVM_S390_VM_CPU_PROCESSOR:
1056                 ret = kvm_s390_set_processor(kvm, attr);
1057                 break;
1058         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1059                 ret = kvm_s390_set_processor_feat(kvm, attr);
1060                 break;
1061         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1062                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1063                 break;
1064         }
1065         return ret;
1066 }
1067
1068 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1069 {
1070         struct kvm_s390_vm_cpu_processor *proc;
1071         int ret = 0;
1072
1073         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1074         if (!proc) {
1075                 ret = -ENOMEM;
1076                 goto out;
1077         }
1078         proc->cpuid = kvm->arch.model.cpuid;
1079         proc->ibc = kvm->arch.model.ibc;
1080         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1081                S390_ARCH_FAC_LIST_SIZE_BYTE);
1082         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1083                  kvm->arch.model.ibc,
1084                  kvm->arch.model.cpuid);
1085         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1086                  kvm->arch.model.fac_list[0],
1087                  kvm->arch.model.fac_list[1],
1088                  kvm->arch.model.fac_list[2]);
1089         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1090                 ret = -EFAULT;
1091         kfree(proc);
1092 out:
1093         return ret;
1094 }
1095
1096 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1097 {
1098         struct kvm_s390_vm_cpu_machine *mach;
1099         int ret = 0;
1100
1101         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1102         if (!mach) {
1103                 ret = -ENOMEM;
1104                 goto out;
1105         }
1106         get_cpu_id((struct cpuid *) &mach->cpuid);
1107         mach->ibc = sclp.ibc;
1108         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1109                S390_ARCH_FAC_LIST_SIZE_BYTE);
1110         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1111                sizeof(S390_lowcore.stfle_fac_list));
1112         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1113                  kvm->arch.model.ibc,
1114                  kvm->arch.model.cpuid);
1115         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1116                  mach->fac_mask[0],
1117                  mach->fac_mask[1],
1118                  mach->fac_mask[2]);
1119         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1120                  mach->fac_list[0],
1121                  mach->fac_list[1],
1122                  mach->fac_list[2]);
1123         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1124                 ret = -EFAULT;
1125         kfree(mach);
1126 out:
1127         return ret;
1128 }
1129
1130 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1131                                        struct kvm_device_attr *attr)
1132 {
1133         struct kvm_s390_vm_cpu_feat data;
1134
1135         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1136                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1137         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1138                 return -EFAULT;
1139         return 0;
1140 }
1141
1142 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1143                                      struct kvm_device_attr *attr)
1144 {
1145         struct kvm_s390_vm_cpu_feat data;
1146
1147         bitmap_copy((unsigned long *) data.feat,
1148                     kvm_s390_available_cpu_feat,
1149                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1150         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1151                 return -EFAULT;
1152         return 0;
1153 }
1154
1155 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1156                                           struct kvm_device_attr *attr)
1157 {
1158         /*
1159          * Once we can actually configure subfunctions (kernel + hw support),
1160          * we have to check if they were already set by user space, if so copy
1161          * them from kvm->arch.
1162          */
1163         return -ENXIO;
1164 }
1165
1166 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1167                                         struct kvm_device_attr *attr)
1168 {
1169         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1170             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1171                 return -EFAULT;
1172         return 0;
1173 }
1174 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1175 {
1176         int ret = -ENXIO;
1177
1178         switch (attr->attr) {
1179         case KVM_S390_VM_CPU_PROCESSOR:
1180                 ret = kvm_s390_get_processor(kvm, attr);
1181                 break;
1182         case KVM_S390_VM_CPU_MACHINE:
1183                 ret = kvm_s390_get_machine(kvm, attr);
1184                 break;
1185         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1186                 ret = kvm_s390_get_processor_feat(kvm, attr);
1187                 break;
1188         case KVM_S390_VM_CPU_MACHINE_FEAT:
1189                 ret = kvm_s390_get_machine_feat(kvm, attr);
1190                 break;
1191         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1192                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1193                 break;
1194         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1195                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1196                 break;
1197         }
1198         return ret;
1199 }
1200
1201 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1202 {
1203         int ret;
1204
1205         switch (attr->group) {
1206         case KVM_S390_VM_MEM_CTRL:
1207                 ret = kvm_s390_set_mem_control(kvm, attr);
1208                 break;
1209         case KVM_S390_VM_TOD:
1210                 ret = kvm_s390_set_tod(kvm, attr);
1211                 break;
1212         case KVM_S390_VM_CPU_MODEL:
1213                 ret = kvm_s390_set_cpu_model(kvm, attr);
1214                 break;
1215         case KVM_S390_VM_CRYPTO:
1216                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1217                 break;
1218         case KVM_S390_VM_MIGRATION:
1219                 ret = kvm_s390_vm_set_migration(kvm, attr);
1220                 break;
1221         default:
1222                 ret = -ENXIO;
1223                 break;
1224         }
1225
1226         return ret;
1227 }
1228
1229 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1230 {
1231         int ret;
1232
1233         switch (attr->group) {
1234         case KVM_S390_VM_MEM_CTRL:
1235                 ret = kvm_s390_get_mem_control(kvm, attr);
1236                 break;
1237         case KVM_S390_VM_TOD:
1238                 ret = kvm_s390_get_tod(kvm, attr);
1239                 break;
1240         case KVM_S390_VM_CPU_MODEL:
1241                 ret = kvm_s390_get_cpu_model(kvm, attr);
1242                 break;
1243         case KVM_S390_VM_MIGRATION:
1244                 ret = kvm_s390_vm_get_migration(kvm, attr);
1245                 break;
1246         default:
1247                 ret = -ENXIO;
1248                 break;
1249         }
1250
1251         return ret;
1252 }
1253
1254 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1255 {
1256         int ret;
1257
1258         switch (attr->group) {
1259         case KVM_S390_VM_MEM_CTRL:
1260                 switch (attr->attr) {
1261                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1262                 case KVM_S390_VM_MEM_CLR_CMMA:
1263                         ret = sclp.has_cmma ? 0 : -ENXIO;
1264                         break;
1265                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1266                         ret = 0;
1267                         break;
1268                 default:
1269                         ret = -ENXIO;
1270                         break;
1271                 }
1272                 break;
1273         case KVM_S390_VM_TOD:
1274                 switch (attr->attr) {
1275                 case KVM_S390_VM_TOD_LOW:
1276                 case KVM_S390_VM_TOD_HIGH:
1277                         ret = 0;
1278                         break;
1279                 default:
1280                         ret = -ENXIO;
1281                         break;
1282                 }
1283                 break;
1284         case KVM_S390_VM_CPU_MODEL:
1285                 switch (attr->attr) {
1286                 case KVM_S390_VM_CPU_PROCESSOR:
1287                 case KVM_S390_VM_CPU_MACHINE:
1288                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1289                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1290                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1291                         ret = 0;
1292                         break;
1293                 /* configuring subfunctions is not supported yet */
1294                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1295                 default:
1296                         ret = -ENXIO;
1297                         break;
1298                 }
1299                 break;
1300         case KVM_S390_VM_CRYPTO:
1301                 switch (attr->attr) {
1302                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1303                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1304                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1305                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1306                         ret = 0;
1307                         break;
1308                 default:
1309                         ret = -ENXIO;
1310                         break;
1311                 }
1312                 break;
1313         case KVM_S390_VM_MIGRATION:
1314                 ret = 0;
1315                 break;
1316         default:
1317                 ret = -ENXIO;
1318                 break;
1319         }
1320
1321         return ret;
1322 }
1323
1324 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1325 {
1326         uint8_t *keys;
1327         uint64_t hva;
1328         int i, r = 0;
1329
1330         if (args->flags != 0)
1331                 return -EINVAL;
1332
1333         /* Is this guest using storage keys? */
1334         if (!mm_use_skey(current->mm))
1335                 return KVM_S390_GET_SKEYS_NONE;
1336
1337         /* Enforce sane limit on memory allocation */
1338         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1339                 return -EINVAL;
1340
1341         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1342         if (!keys)
1343                 return -ENOMEM;
1344
1345         down_read(&current->mm->mmap_sem);
1346         for (i = 0; i < args->count; i++) {
1347                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1348                 if (kvm_is_error_hva(hva)) {
1349                         r = -EFAULT;
1350                         break;
1351                 }
1352
1353                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1354                 if (r)
1355                         break;
1356         }
1357         up_read(&current->mm->mmap_sem);
1358
1359         if (!r) {
1360                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1361                                  sizeof(uint8_t) * args->count);
1362                 if (r)
1363                         r = -EFAULT;
1364         }
1365
1366         kvfree(keys);
1367         return r;
1368 }
1369
1370 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1371 {
1372         uint8_t *keys;
1373         uint64_t hva;
1374         int i, r = 0;
1375
1376         if (args->flags != 0)
1377                 return -EINVAL;
1378
1379         /* Enforce sane limit on memory allocation */
1380         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1381                 return -EINVAL;
1382
1383         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1384         if (!keys)
1385                 return -ENOMEM;
1386
1387         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1388                            sizeof(uint8_t) * args->count);
1389         if (r) {
1390                 r = -EFAULT;
1391                 goto out;
1392         }
1393
1394         /* Enable storage key handling for the guest */
1395         r = s390_enable_skey();
1396         if (r)
1397                 goto out;
1398
1399         down_read(&current->mm->mmap_sem);
1400         for (i = 0; i < args->count; i++) {
1401                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1402                 if (kvm_is_error_hva(hva)) {
1403                         r = -EFAULT;
1404                         break;
1405                 }
1406
1407                 /* Lowest order bit is reserved */
1408                 if (keys[i] & 0x01) {
1409                         r = -EINVAL;
1410                         break;
1411                 }
1412
1413                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1414                 if (r)
1415                         break;
1416         }
1417         up_read(&current->mm->mmap_sem);
1418 out:
1419         kvfree(keys);
1420         return r;
1421 }
1422
1423 /*
1424  * Base address and length must be sent at the start of each block, therefore
1425  * it's cheaper to send some clean data, as long as it's less than the size of
1426  * two longs.
1427  */
1428 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1429 /* for consistency */
1430 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1431
1432 /*
1433  * This function searches for the next page with dirty CMMA attributes, and
1434  * saves the attributes in the buffer up to either the end of the buffer or
1435  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1436  * no trailing clean bytes are saved.
1437  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1438  * output buffer will indicate 0 as length.
1439  */
1440 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1441                                   struct kvm_s390_cmma_log *args)
1442 {
1443         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1444         unsigned long bufsize, hva, pgstev, i, next, cur;
1445         int srcu_idx, peek, r = 0, rr;
1446         u8 *res;
1447
1448         cur = args->start_gfn;
1449         i = next = pgstev = 0;
1450
1451         if (unlikely(!kvm->arch.use_cmma))
1452                 return -ENXIO;
1453         /* Invalid/unsupported flags were specified */
1454         if (args->flags & ~KVM_S390_CMMA_PEEK)
1455                 return -EINVAL;
1456         /* Migration mode query, and we are not doing a migration */
1457         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1458         if (!peek && !s)
1459                 return -EINVAL;
1460         /* CMMA is disabled or was not used, or the buffer has length zero */
1461         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1462         if (!bufsize || !kvm->mm->context.use_cmma) {
1463                 memset(args, 0, sizeof(*args));
1464                 return 0;
1465         }
1466
1467         if (!peek) {
1468                 /* We are not peeking, and there are no dirty pages */
1469                 if (!atomic64_read(&s->dirty_pages)) {
1470                         memset(args, 0, sizeof(*args));
1471                         return 0;
1472                 }
1473                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1474                                     args->start_gfn);
1475                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1476                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1477                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1478                         memset(args, 0, sizeof(*args));
1479                         return 0;
1480                 }
1481                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1482         }
1483
1484         res = vmalloc(bufsize);
1485         if (!res)
1486                 return -ENOMEM;
1487
1488         args->start_gfn = cur;
1489
1490         down_read(&kvm->mm->mmap_sem);
1491         srcu_idx = srcu_read_lock(&kvm->srcu);
1492         while (i < bufsize) {
1493                 hva = gfn_to_hva(kvm, cur);
1494                 if (kvm_is_error_hva(hva)) {
1495                         r = -EFAULT;
1496                         break;
1497                 }
1498                 /* decrement only if we actually flipped the bit to 0 */
1499                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1500                         atomic64_dec(&s->dirty_pages);
1501                 r = get_pgste(kvm->mm, hva, &pgstev);
1502                 if (r < 0)
1503                         pgstev = 0;
1504                 /* save the value */
1505                 res[i++] = (pgstev >> 24) & 0x3;
1506                 /*
1507                  * if the next bit is too far away, stop.
1508                  * if we reached the previous "next", find the next one
1509                  */
1510                 if (!peek) {
1511                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1512                                 break;
1513                         if (cur == next)
1514                                 next = find_next_bit(s->pgste_bitmap,
1515                                                      s->bitmap_size, cur + 1);
1516                 /* reached the end of the bitmap or of the buffer, stop */
1517                         if ((next >= s->bitmap_size) ||
1518                             (next >= args->start_gfn + bufsize))
1519                                 break;
1520                 }
1521                 cur++;
1522         }
1523         srcu_read_unlock(&kvm->srcu, srcu_idx);
1524         up_read(&kvm->mm->mmap_sem);
1525         args->count = i;
1526         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1527
1528         rr = copy_to_user((void __user *)args->values, res, args->count);
1529         if (rr)
1530                 r = -EFAULT;
1531
1532         vfree(res);
1533         return r;
1534 }
1535
1536 /*
1537  * This function sets the CMMA attributes for the given pages. If the input
1538  * buffer has zero length, no action is taken, otherwise the attributes are
1539  * set and the mm->context.use_cmma flag is set.
1540  */
1541 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1542                                   const struct kvm_s390_cmma_log *args)
1543 {
1544         unsigned long hva, mask, pgstev, i;
1545         uint8_t *bits;
1546         int srcu_idx, r = 0;
1547
1548         mask = args->mask;
1549
1550         if (!kvm->arch.use_cmma)
1551                 return -ENXIO;
1552         /* invalid/unsupported flags */
1553         if (args->flags != 0)
1554                 return -EINVAL;
1555         /* Enforce sane limit on memory allocation */
1556         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1557                 return -EINVAL;
1558         /* Nothing to do */
1559         if (args->count == 0)
1560                 return 0;
1561
1562         bits = vmalloc(sizeof(*bits) * args->count);
1563         if (!bits)
1564                 return -ENOMEM;
1565
1566         r = copy_from_user(bits, (void __user *)args->values, args->count);
1567         if (r) {
1568                 r = -EFAULT;
1569                 goto out;
1570         }
1571
1572         down_read(&kvm->mm->mmap_sem);
1573         srcu_idx = srcu_read_lock(&kvm->srcu);
1574         for (i = 0; i < args->count; i++) {
1575                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1576                 if (kvm_is_error_hva(hva)) {
1577                         r = -EFAULT;
1578                         break;
1579                 }
1580
1581                 pgstev = bits[i];
1582                 pgstev = pgstev << 24;
1583                 mask &= _PGSTE_GPS_USAGE_MASK;
1584                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1585         }
1586         srcu_read_unlock(&kvm->srcu, srcu_idx);
1587         up_read(&kvm->mm->mmap_sem);
1588
1589         if (!kvm->mm->context.use_cmma) {
1590                 down_write(&kvm->mm->mmap_sem);
1591                 kvm->mm->context.use_cmma = 1;
1592                 up_write(&kvm->mm->mmap_sem);
1593         }
1594 out:
1595         vfree(bits);
1596         return r;
1597 }
1598
1599 long kvm_arch_vm_ioctl(struct file *filp,
1600                        unsigned int ioctl, unsigned long arg)
1601 {
1602         struct kvm *kvm = filp->private_data;
1603         void __user *argp = (void __user *)arg;
1604         struct kvm_device_attr attr;
1605         int r;
1606
1607         switch (ioctl) {
1608         case KVM_S390_INTERRUPT: {
1609                 struct kvm_s390_interrupt s390int;
1610
1611                 r = -EFAULT;
1612                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1613                         break;
1614                 r = kvm_s390_inject_vm(kvm, &s390int);
1615                 break;
1616         }
1617         case KVM_ENABLE_CAP: {
1618                 struct kvm_enable_cap cap;
1619                 r = -EFAULT;
1620                 if (copy_from_user(&cap, argp, sizeof(cap)))
1621                         break;
1622                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1623                 break;
1624         }
1625         case KVM_CREATE_IRQCHIP: {
1626                 struct kvm_irq_routing_entry routing;
1627
1628                 r = -EINVAL;
1629                 if (kvm->arch.use_irqchip) {
1630                         /* Set up dummy routing. */
1631                         memset(&routing, 0, sizeof(routing));
1632                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1633                 }
1634                 break;
1635         }
1636         case KVM_SET_DEVICE_ATTR: {
1637                 r = -EFAULT;
1638                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1639                         break;
1640                 r = kvm_s390_vm_set_attr(kvm, &attr);
1641                 break;
1642         }
1643         case KVM_GET_DEVICE_ATTR: {
1644                 r = -EFAULT;
1645                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1646                         break;
1647                 r = kvm_s390_vm_get_attr(kvm, &attr);
1648                 break;
1649         }
1650         case KVM_HAS_DEVICE_ATTR: {
1651                 r = -EFAULT;
1652                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1653                         break;
1654                 r = kvm_s390_vm_has_attr(kvm, &attr);
1655                 break;
1656         }
1657         case KVM_S390_GET_SKEYS: {
1658                 struct kvm_s390_skeys args;
1659
1660                 r = -EFAULT;
1661                 if (copy_from_user(&args, argp,
1662                                    sizeof(struct kvm_s390_skeys)))
1663                         break;
1664                 r = kvm_s390_get_skeys(kvm, &args);
1665                 break;
1666         }
1667         case KVM_S390_SET_SKEYS: {
1668                 struct kvm_s390_skeys args;
1669
1670                 r = -EFAULT;
1671                 if (copy_from_user(&args, argp,
1672                                    sizeof(struct kvm_s390_skeys)))
1673                         break;
1674                 r = kvm_s390_set_skeys(kvm, &args);
1675                 break;
1676         }
1677         case KVM_S390_GET_CMMA_BITS: {
1678                 struct kvm_s390_cmma_log args;
1679
1680                 r = -EFAULT;
1681                 if (copy_from_user(&args, argp, sizeof(args)))
1682                         break;
1683                 r = kvm_s390_get_cmma_bits(kvm, &args);
1684                 if (!r) {
1685                         r = copy_to_user(argp, &args, sizeof(args));
1686                         if (r)
1687                                 r = -EFAULT;
1688                 }
1689                 break;
1690         }
1691         case KVM_S390_SET_CMMA_BITS: {
1692                 struct kvm_s390_cmma_log args;
1693
1694                 r = -EFAULT;
1695                 if (copy_from_user(&args, argp, sizeof(args)))
1696                         break;
1697                 r = kvm_s390_set_cmma_bits(kvm, &args);
1698                 break;
1699         }
1700         default:
1701                 r = -ENOTTY;
1702         }
1703
1704         return r;
1705 }
1706
1707 static int kvm_s390_query_ap_config(u8 *config)
1708 {
1709         u32 fcn_code = 0x04000000UL;
1710         u32 cc = 0;
1711
1712         memset(config, 0, 128);
1713         asm volatile(
1714                 "lgr 0,%1\n"
1715                 "lgr 2,%2\n"
1716                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1717                 "0: ipm %0\n"
1718                 "srl %0,28\n"
1719                 "1:\n"
1720                 EX_TABLE(0b, 1b)
1721                 : "+r" (cc)
1722                 : "r" (fcn_code), "r" (config)
1723                 : "cc", "0", "2", "memory"
1724         );
1725
1726         return cc;
1727 }
1728
1729 static int kvm_s390_apxa_installed(void)
1730 {
1731         u8 config[128];
1732         int cc;
1733
1734         if (test_facility(12)) {
1735                 cc = kvm_s390_query_ap_config(config);
1736
1737                 if (cc)
1738                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1739                 else
1740                         return config[0] & 0x40;
1741         }
1742
1743         return 0;
1744 }
1745
1746 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1747 {
1748         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1749
1750         if (kvm_s390_apxa_installed())
1751                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1752         else
1753                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1754 }
1755
1756 static u64 kvm_s390_get_initial_cpuid(void)
1757 {
1758         struct cpuid cpuid;
1759
1760         get_cpu_id(&cpuid);
1761         cpuid.version = 0xff;
1762         return *((u64 *) &cpuid);
1763 }
1764
1765 static void kvm_s390_crypto_init(struct kvm *kvm)
1766 {
1767         if (!test_kvm_facility(kvm, 76))
1768                 return;
1769
1770         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1771         kvm_s390_set_crycb_format(kvm);
1772
1773         /* Enable AES/DEA protected key functions by default */
1774         kvm->arch.crypto.aes_kw = 1;
1775         kvm->arch.crypto.dea_kw = 1;
1776         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1777                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1778         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1779                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1780 }
1781
1782 static void sca_dispose(struct kvm *kvm)
1783 {
1784         if (kvm->arch.use_esca)
1785                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1786         else
1787                 free_page((unsigned long)(kvm->arch.sca));
1788         kvm->arch.sca = NULL;
1789 }
1790
1791 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1792 {
1793         gfp_t alloc_flags = GFP_KERNEL;
1794         int i, rc;
1795         char debug_name[16];
1796         static unsigned long sca_offset;
1797
1798         rc = -EINVAL;
1799 #ifdef CONFIG_KVM_S390_UCONTROL
1800         if (type & ~KVM_VM_S390_UCONTROL)
1801                 goto out_err;
1802         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1803                 goto out_err;
1804 #else
1805         if (type)
1806                 goto out_err;
1807 #endif
1808
1809         rc = s390_enable_sie();
1810         if (rc)
1811                 goto out_err;
1812
1813         rc = -ENOMEM;
1814
1815         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1816
1817         kvm->arch.use_esca = 0; /* start with basic SCA */
1818         if (!sclp.has_64bscao)
1819                 alloc_flags |= GFP_DMA;
1820         rwlock_init(&kvm->arch.sca_lock);
1821         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1822         if (!kvm->arch.sca)
1823                 goto out_err;
1824         spin_lock(&kvm_lock);
1825         sca_offset += 16;
1826         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1827                 sca_offset = 0;
1828         kvm->arch.sca = (struct bsca_block *)
1829                         ((char *) kvm->arch.sca + sca_offset);
1830         spin_unlock(&kvm_lock);
1831
1832         sprintf(debug_name, "kvm-%u", current->pid);
1833
1834         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1835         if (!kvm->arch.dbf)
1836                 goto out_err;
1837
1838         kvm->arch.sie_page2 =
1839              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1840         if (!kvm->arch.sie_page2)
1841                 goto out_err;
1842
1843         /* Populate the facility mask initially. */
1844         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1845                sizeof(S390_lowcore.stfle_fac_list));
1846         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1847                 if (i < kvm_s390_fac_list_mask_size())
1848                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1849                 else
1850                         kvm->arch.model.fac_mask[i] = 0UL;
1851         }
1852
1853         /* Populate the facility list initially. */
1854         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1855         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1856                S390_ARCH_FAC_LIST_SIZE_BYTE);
1857
1858         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1859         set_kvm_facility(kvm->arch.model.fac_list, 74);
1860
1861         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1862         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1863
1864         kvm_s390_crypto_init(kvm);
1865
1866         mutex_init(&kvm->arch.float_int.ais_lock);
1867         kvm->arch.float_int.simm = 0;
1868         kvm->arch.float_int.nimm = 0;
1869         kvm->arch.float_int.ais_enabled = 0;
1870         spin_lock_init(&kvm->arch.float_int.lock);
1871         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1872                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1873         init_waitqueue_head(&kvm->arch.ipte_wq);
1874         mutex_init(&kvm->arch.ipte_mutex);
1875
1876         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1877         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1878
1879         if (type & KVM_VM_S390_UCONTROL) {
1880                 kvm->arch.gmap = NULL;
1881                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1882         } else {
1883                 if (sclp.hamax == U64_MAX)
1884                         kvm->arch.mem_limit = TASK_SIZE_MAX;
1885                 else
1886                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1887                                                     sclp.hamax + 1);
1888                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1889                 if (!kvm->arch.gmap)
1890                         goto out_err;
1891                 kvm->arch.gmap->private = kvm;
1892                 kvm->arch.gmap->pfault_enabled = 0;
1893         }
1894
1895         kvm->arch.css_support = 0;
1896         kvm->arch.use_irqchip = 0;
1897         kvm->arch.epoch = 0;
1898
1899         spin_lock_init(&kvm->arch.start_stop_lock);
1900         kvm_s390_vsie_init(kvm);
1901         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1902
1903         return 0;
1904 out_err:
1905         free_page((unsigned long)kvm->arch.sie_page2);
1906         debug_unregister(kvm->arch.dbf);
1907         sca_dispose(kvm);
1908         KVM_EVENT(3, "creation of vm failed: %d", rc);
1909         return rc;
1910 }
1911
1912 bool kvm_arch_has_vcpu_debugfs(void)
1913 {
1914         return false;
1915 }
1916
1917 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1918 {
1919         return 0;
1920 }
1921
1922 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1923 {
1924         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1925         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1926         kvm_s390_clear_local_irqs(vcpu);
1927         kvm_clear_async_pf_completion_queue(vcpu);
1928         if (!kvm_is_ucontrol(vcpu->kvm))
1929                 sca_del_vcpu(vcpu);
1930
1931         if (kvm_is_ucontrol(vcpu->kvm))
1932                 gmap_remove(vcpu->arch.gmap);
1933
1934         if (vcpu->kvm->arch.use_cmma)
1935                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1936         free_page((unsigned long)(vcpu->arch.sie_block));
1937
1938         kvm_vcpu_uninit(vcpu);
1939         kmem_cache_free(kvm_vcpu_cache, vcpu);
1940 }
1941
1942 static void kvm_free_vcpus(struct kvm *kvm)
1943 {
1944         unsigned int i;
1945         struct kvm_vcpu *vcpu;
1946
1947         kvm_for_each_vcpu(i, vcpu, kvm)
1948                 kvm_arch_vcpu_destroy(vcpu);
1949
1950         mutex_lock(&kvm->lock);
1951         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1952                 kvm->vcpus[i] = NULL;
1953
1954         atomic_set(&kvm->online_vcpus, 0);
1955         mutex_unlock(&kvm->lock);
1956 }
1957
1958 void kvm_arch_destroy_vm(struct kvm *kvm)
1959 {
1960         kvm_free_vcpus(kvm);
1961         sca_dispose(kvm);
1962         debug_unregister(kvm->arch.dbf);
1963         free_page((unsigned long)kvm->arch.sie_page2);
1964         if (!kvm_is_ucontrol(kvm))
1965                 gmap_remove(kvm->arch.gmap);
1966         kvm_s390_destroy_adapters(kvm);
1967         kvm_s390_clear_float_irqs(kvm);
1968         kvm_s390_vsie_destroy(kvm);
1969         if (kvm->arch.migration_state) {
1970                 vfree(kvm->arch.migration_state->pgste_bitmap);
1971                 kfree(kvm->arch.migration_state);
1972         }
1973         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1974 }
1975
1976 /* Section: vcpu related */
1977 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1978 {
1979         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1980         if (!vcpu->arch.gmap)
1981                 return -ENOMEM;
1982         vcpu->arch.gmap->private = vcpu->kvm;
1983
1984         return 0;
1985 }
1986
1987 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1988 {
1989         if (!kvm_s390_use_sca_entries())
1990                 return;
1991         read_lock(&vcpu->kvm->arch.sca_lock);
1992         if (vcpu->kvm->arch.use_esca) {
1993                 struct esca_block *sca = vcpu->kvm->arch.sca;
1994
1995                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1996                 sca->cpu[vcpu->vcpu_id].sda = 0;
1997         } else {
1998                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1999
2000                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2001                 sca->cpu[vcpu->vcpu_id].sda = 0;
2002         }
2003         read_unlock(&vcpu->kvm->arch.sca_lock);
2004 }
2005
2006 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2007 {
2008         if (!kvm_s390_use_sca_entries()) {
2009                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2010
2011                 /* we still need the basic sca for the ipte control */
2012                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2013                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2014         }
2015         read_lock(&vcpu->kvm->arch.sca_lock);
2016         if (vcpu->kvm->arch.use_esca) {
2017                 struct esca_block *sca = vcpu->kvm->arch.sca;
2018
2019                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2020                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2021                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2022                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2023                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2024         } else {
2025                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2026
2027                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2028                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2029                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2030                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2031         }
2032         read_unlock(&vcpu->kvm->arch.sca_lock);
2033 }
2034
2035 /* Basic SCA to Extended SCA data copy routines */
2036 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2037 {
2038         d->sda = s->sda;
2039         d->sigp_ctrl.c = s->sigp_ctrl.c;
2040         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2041 }
2042
2043 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2044 {
2045         int i;
2046
2047         d->ipte_control = s->ipte_control;
2048         d->mcn[0] = s->mcn;
2049         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2050                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2051 }
2052
2053 static int sca_switch_to_extended(struct kvm *kvm)
2054 {
2055         struct bsca_block *old_sca = kvm->arch.sca;
2056         struct esca_block *new_sca;
2057         struct kvm_vcpu *vcpu;
2058         unsigned int vcpu_idx;
2059         u32 scaol, scaoh;
2060
2061         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2062         if (!new_sca)
2063                 return -ENOMEM;
2064
2065         scaoh = (u32)((u64)(new_sca) >> 32);
2066         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2067
2068         kvm_s390_vcpu_block_all(kvm);
2069         write_lock(&kvm->arch.sca_lock);
2070
2071         sca_copy_b_to_e(new_sca, old_sca);
2072
2073         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2074                 vcpu->arch.sie_block->scaoh = scaoh;
2075                 vcpu->arch.sie_block->scaol = scaol;
2076                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2077         }
2078         kvm->arch.sca = new_sca;
2079         kvm->arch.use_esca = 1;
2080
2081         write_unlock(&kvm->arch.sca_lock);
2082         kvm_s390_vcpu_unblock_all(kvm);
2083
2084         free_page((unsigned long)old_sca);
2085
2086         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2087                  old_sca, kvm->arch.sca);
2088         return 0;
2089 }
2090
2091 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2092 {
2093         int rc;
2094
2095         if (!kvm_s390_use_sca_entries()) {
2096                 if (id < KVM_MAX_VCPUS)
2097                         return true;
2098                 return false;
2099         }
2100         if (id < KVM_S390_BSCA_CPU_SLOTS)
2101                 return true;
2102         if (!sclp.has_esca || !sclp.has_64bscao)
2103                 return false;
2104
2105         mutex_lock(&kvm->lock);
2106         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2107         mutex_unlock(&kvm->lock);
2108
2109         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2110 }
2111
2112 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2113 {
2114         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2115         kvm_clear_async_pf_completion_queue(vcpu);
2116         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2117                                     KVM_SYNC_GPRS |
2118                                     KVM_SYNC_ACRS |
2119                                     KVM_SYNC_CRS |
2120                                     KVM_SYNC_ARCH0 |
2121                                     KVM_SYNC_PFAULT;
2122         kvm_s390_set_prefix(vcpu, 0);
2123         if (test_kvm_facility(vcpu->kvm, 64))
2124                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2125         if (test_kvm_facility(vcpu->kvm, 133))
2126                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2127         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2128          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2129          */
2130         if (MACHINE_HAS_VX)
2131                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2132         else
2133                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2134
2135         if (kvm_is_ucontrol(vcpu->kvm))
2136                 return __kvm_ucontrol_vcpu_init(vcpu);
2137
2138         return 0;
2139 }
2140
2141 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2142 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2143 {
2144         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2145         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2146         vcpu->arch.cputm_start = get_tod_clock_fast();
2147         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2148 }
2149
2150 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2151 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2152 {
2153         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2154         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2155         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2156         vcpu->arch.cputm_start = 0;
2157         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2158 }
2159
2160 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2161 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2162 {
2163         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2164         vcpu->arch.cputm_enabled = true;
2165         __start_cpu_timer_accounting(vcpu);
2166 }
2167
2168 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2169 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2170 {
2171         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2172         __stop_cpu_timer_accounting(vcpu);
2173         vcpu->arch.cputm_enabled = false;
2174 }
2175
2176 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2177 {
2178         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2179         __enable_cpu_timer_accounting(vcpu);
2180         preempt_enable();
2181 }
2182
2183 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2184 {
2185         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2186         __disable_cpu_timer_accounting(vcpu);
2187         preempt_enable();
2188 }
2189
2190 /* set the cpu timer - may only be called from the VCPU thread itself */
2191 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2192 {
2193         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2194         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2195         if (vcpu->arch.cputm_enabled)
2196                 vcpu->arch.cputm_start = get_tod_clock_fast();
2197         vcpu->arch.sie_block->cputm = cputm;
2198         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2199         preempt_enable();
2200 }
2201
2202 /* update and get the cpu timer - can also be called from other VCPU threads */
2203 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2204 {
2205         unsigned int seq;
2206         __u64 value;
2207
2208         if (unlikely(!vcpu->arch.cputm_enabled))
2209                 return vcpu->arch.sie_block->cputm;
2210
2211         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2212         do {
2213                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2214                 /*
2215                  * If the writer would ever execute a read in the critical
2216                  * section, e.g. in irq context, we have a deadlock.
2217                  */
2218                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2219                 value = vcpu->arch.sie_block->cputm;
2220                 /* if cputm_start is 0, accounting is being started/stopped */
2221                 if (likely(vcpu->arch.cputm_start))
2222                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2223         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2224         preempt_enable();
2225         return value;
2226 }
2227
2228 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2229 {
2230
2231         gmap_enable(vcpu->arch.enabled_gmap);
2232         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2233         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2234                 __start_cpu_timer_accounting(vcpu);
2235         vcpu->cpu = cpu;
2236 }
2237
2238 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2239 {
2240         vcpu->cpu = -1;
2241         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2242                 __stop_cpu_timer_accounting(vcpu);
2243         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2244         vcpu->arch.enabled_gmap = gmap_get_enabled();
2245         gmap_disable(vcpu->arch.enabled_gmap);
2246
2247 }
2248
2249 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2250 {
2251         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2252         vcpu->arch.sie_block->gpsw.mask = 0UL;
2253         vcpu->arch.sie_block->gpsw.addr = 0UL;
2254         kvm_s390_set_prefix(vcpu, 0);
2255         kvm_s390_set_cpu_timer(vcpu, 0);
2256         vcpu->arch.sie_block->ckc       = 0UL;
2257         vcpu->arch.sie_block->todpr     = 0;
2258         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2259         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2260         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2261         /* make sure the new fpc will be lazily loaded */
2262         save_fpu_regs();
2263         current->thread.fpu.fpc = 0;
2264         vcpu->arch.sie_block->gbea = 1;
2265         vcpu->arch.sie_block->pp = 0;
2266         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2267         kvm_clear_async_pf_completion_queue(vcpu);
2268         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2269                 kvm_s390_vcpu_stop(vcpu);
2270         kvm_s390_clear_local_irqs(vcpu);
2271 }
2272
2273 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2274 {
2275         mutex_lock(&vcpu->kvm->lock);
2276         preempt_disable();
2277         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2278         preempt_enable();
2279         mutex_unlock(&vcpu->kvm->lock);
2280         if (!kvm_is_ucontrol(vcpu->kvm)) {
2281                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2282                 sca_add_vcpu(vcpu);
2283         }
2284         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2285                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2286         /* make vcpu_load load the right gmap on the first trigger */
2287         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2288 }
2289
2290 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2291 {
2292         if (!test_kvm_facility(vcpu->kvm, 76))
2293                 return;
2294
2295         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2296
2297         if (vcpu->kvm->arch.crypto.aes_kw)
2298                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2299         if (vcpu->kvm->arch.crypto.dea_kw)
2300                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2301
2302         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2303 }
2304
2305 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2306 {
2307         free_page(vcpu->arch.sie_block->cbrlo);
2308         vcpu->arch.sie_block->cbrlo = 0;
2309 }
2310
2311 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2312 {
2313         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2314         if (!vcpu->arch.sie_block->cbrlo)
2315                 return -ENOMEM;
2316
2317         vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2318         return 0;
2319 }
2320
2321 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2322 {
2323         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2324
2325         vcpu->arch.sie_block->ibc = model->ibc;
2326         if (test_kvm_facility(vcpu->kvm, 7))
2327                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2328 }
2329
2330 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2331 {
2332         int rc = 0;
2333
2334         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2335                                                     CPUSTAT_SM |
2336                                                     CPUSTAT_STOPPED);
2337
2338         if (test_kvm_facility(vcpu->kvm, 78))
2339                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2340         else if (test_kvm_facility(vcpu->kvm, 8))
2341                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2342
2343         kvm_s390_vcpu_setup_model(vcpu);
2344
2345         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2346         if (MACHINE_HAS_ESOP)
2347                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2348         if (test_kvm_facility(vcpu->kvm, 9))
2349                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2350         if (test_kvm_facility(vcpu->kvm, 73))
2351                 vcpu->arch.sie_block->ecb |= ECB_TE;
2352
2353         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2354                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2355         if (test_kvm_facility(vcpu->kvm, 130))
2356                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2357         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2358         if (sclp.has_cei)
2359                 vcpu->arch.sie_block->eca |= ECA_CEI;
2360         if (sclp.has_ib)
2361                 vcpu->arch.sie_block->eca |= ECA_IB;
2362         if (sclp.has_siif)
2363                 vcpu->arch.sie_block->eca |= ECA_SII;
2364         if (sclp.has_sigpif)
2365                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2366         if (test_kvm_facility(vcpu->kvm, 129)) {
2367                 vcpu->arch.sie_block->eca |= ECA_VX;
2368                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2369         }
2370         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2371                                         | SDNXC;
2372         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2373
2374         if (sclp.has_kss)
2375                 atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2376         else
2377                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2378
2379         if (vcpu->kvm->arch.use_cmma) {
2380                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2381                 if (rc)
2382                         return rc;
2383         }
2384         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2385         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2386
2387         kvm_s390_vcpu_crypto_setup(vcpu);
2388
2389         return rc;
2390 }
2391
2392 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2393                                       unsigned int id)
2394 {
2395         struct kvm_vcpu *vcpu;
2396         struct sie_page *sie_page;
2397         int rc = -EINVAL;
2398
2399         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2400                 goto out;
2401
2402         rc = -ENOMEM;
2403
2404         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2405         if (!vcpu)
2406                 goto out;
2407
2408         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2409         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2410         if (!sie_page)
2411                 goto out_free_cpu;
2412
2413         vcpu->arch.sie_block = &sie_page->sie_block;
2414         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2415
2416         /* the real guest size will always be smaller than msl */
2417         vcpu->arch.sie_block->mso = 0;
2418         vcpu->arch.sie_block->msl = sclp.hamax;
2419
2420         vcpu->arch.sie_block->icpua = id;
2421         spin_lock_init(&vcpu->arch.local_int.lock);
2422         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2423         vcpu->arch.local_int.wq = &vcpu->wq;
2424         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2425         seqcount_init(&vcpu->arch.cputm_seqcount);
2426
2427         rc = kvm_vcpu_init(vcpu, kvm, id);
2428         if (rc)
2429                 goto out_free_sie_block;
2430         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2431                  vcpu->arch.sie_block);
2432         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2433
2434         return vcpu;
2435 out_free_sie_block:
2436         free_page((unsigned long)(vcpu->arch.sie_block));
2437 out_free_cpu:
2438         kmem_cache_free(kvm_vcpu_cache, vcpu);
2439 out:
2440         return ERR_PTR(rc);
2441 }
2442
2443 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2444 {
2445         return kvm_s390_vcpu_has_irq(vcpu, 0);
2446 }
2447
2448 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2449 {
2450         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2451         exit_sie(vcpu);
2452 }
2453
2454 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2455 {
2456         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2457 }
2458
2459 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2460 {
2461         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2462         exit_sie(vcpu);
2463 }
2464
2465 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2466 {
2467         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2468 }
2469
2470 /*
2471  * Kick a guest cpu out of SIE and wait until SIE is not running.
2472  * If the CPU is not running (e.g. waiting as idle) the function will
2473  * return immediately. */
2474 void exit_sie(struct kvm_vcpu *vcpu)
2475 {
2476         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2477         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2478                 cpu_relax();
2479 }
2480
2481 /* Kick a guest cpu out of SIE to process a request synchronously */
2482 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2483 {
2484         kvm_make_request(req, vcpu);
2485         kvm_s390_vcpu_request(vcpu);
2486 }
2487
2488 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2489                               unsigned long end)
2490 {
2491         struct kvm *kvm = gmap->private;
2492         struct kvm_vcpu *vcpu;
2493         unsigned long prefix;
2494         int i;
2495
2496         if (gmap_is_shadow(gmap))
2497                 return;
2498         if (start >= 1UL << 31)
2499                 /* We are only interested in prefix pages */
2500                 return;
2501         kvm_for_each_vcpu(i, vcpu, kvm) {
2502                 /* match against both prefix pages */
2503                 prefix = kvm_s390_get_prefix(vcpu);
2504                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2505                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2506                                    start, end);
2507                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2508                 }
2509         }
2510 }
2511
2512 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2513 {
2514         /* kvm common code refers to this, but never calls it */
2515         BUG();
2516         return 0;
2517 }
2518
2519 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2520                                            struct kvm_one_reg *reg)
2521 {
2522         int r = -EINVAL;
2523
2524         switch (reg->id) {
2525         case KVM_REG_S390_TODPR:
2526                 r = put_user(vcpu->arch.sie_block->todpr,
2527                              (u32 __user *)reg->addr);
2528                 break;
2529         case KVM_REG_S390_EPOCHDIFF:
2530                 r = put_user(vcpu->arch.sie_block->epoch,
2531                              (u64 __user *)reg->addr);
2532                 break;
2533         case KVM_REG_S390_CPU_TIMER:
2534                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2535                              (u64 __user *)reg->addr);
2536                 break;
2537         case KVM_REG_S390_CLOCK_COMP:
2538                 r = put_user(vcpu->arch.sie_block->ckc,
2539                              (u64 __user *)reg->addr);
2540                 break;
2541         case KVM_REG_S390_PFTOKEN:
2542                 r = put_user(vcpu->arch.pfault_token,
2543                              (u64 __user *)reg->addr);
2544                 break;
2545         case KVM_REG_S390_PFCOMPARE:
2546                 r = put_user(vcpu->arch.pfault_compare,
2547                              (u64 __user *)reg->addr);
2548                 break;
2549         case KVM_REG_S390_PFSELECT:
2550                 r = put_user(vcpu->arch.pfault_select,
2551                              (u64 __user *)reg->addr);
2552                 break;
2553         case KVM_REG_S390_PP:
2554                 r = put_user(vcpu->arch.sie_block->pp,
2555                              (u64 __user *)reg->addr);
2556                 break;
2557         case KVM_REG_S390_GBEA:
2558                 r = put_user(vcpu->arch.sie_block->gbea,
2559                              (u64 __user *)reg->addr);
2560                 break;
2561         default:
2562                 break;
2563         }
2564
2565         return r;
2566 }
2567
2568 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2569                                            struct kvm_one_reg *reg)
2570 {
2571         int r = -EINVAL;
2572         __u64 val;
2573
2574         switch (reg->id) {
2575         case KVM_REG_S390_TODPR:
2576                 r = get_user(vcpu->arch.sie_block->todpr,
2577                              (u32 __user *)reg->addr);
2578                 break;
2579         case KVM_REG_S390_EPOCHDIFF:
2580                 r = get_user(vcpu->arch.sie_block->epoch,
2581                              (u64 __user *)reg->addr);
2582                 break;
2583         case KVM_REG_S390_CPU_TIMER:
2584                 r = get_user(val, (u64 __user *)reg->addr);
2585                 if (!r)
2586                         kvm_s390_set_cpu_timer(vcpu, val);
2587                 break;
2588         case KVM_REG_S390_CLOCK_COMP:
2589                 r = get_user(vcpu->arch.sie_block->ckc,
2590                              (u64 __user *)reg->addr);
2591                 break;
2592         case KVM_REG_S390_PFTOKEN:
2593                 r = get_user(vcpu->arch.pfault_token,
2594                              (u64 __user *)reg->addr);
2595                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2596                         kvm_clear_async_pf_completion_queue(vcpu);
2597                 break;
2598         case KVM_REG_S390_PFCOMPARE:
2599                 r = get_user(vcpu->arch.pfault_compare,
2600                              (u64 __user *)reg->addr);
2601                 break;
2602         case KVM_REG_S390_PFSELECT:
2603                 r = get_user(vcpu->arch.pfault_select,
2604                              (u64 __user *)reg->addr);
2605                 break;
2606         case KVM_REG_S390_PP:
2607                 r = get_user(vcpu->arch.sie_block->pp,
2608                              (u64 __user *)reg->addr);
2609                 break;
2610         case KVM_REG_S390_GBEA:
2611                 r = get_user(vcpu->arch.sie_block->gbea,
2612                              (u64 __user *)reg->addr);
2613                 break;
2614         default:
2615                 break;
2616         }
2617
2618         return r;
2619 }
2620
2621 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2622 {
2623         kvm_s390_vcpu_initial_reset(vcpu);
2624         return 0;
2625 }
2626
2627 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2628 {
2629         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2630         return 0;
2631 }
2632
2633 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2634 {
2635         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2636         return 0;
2637 }
2638
2639 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2640                                   struct kvm_sregs *sregs)
2641 {
2642         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2643         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2644         return 0;
2645 }
2646
2647 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2648                                   struct kvm_sregs *sregs)
2649 {
2650         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2651         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2652         return 0;
2653 }
2654
2655 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2656 {
2657         if (test_fp_ctl(fpu->fpc))
2658                 return -EINVAL;
2659         vcpu->run->s.regs.fpc = fpu->fpc;
2660         if (MACHINE_HAS_VX)
2661                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2662                                  (freg_t *) fpu->fprs);
2663         else
2664                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2665         return 0;
2666 }
2667
2668 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2669 {
2670         /* make sure we have the latest values */
2671         save_fpu_regs();
2672         if (MACHINE_HAS_VX)
2673                 convert_vx_to_fp((freg_t *) fpu->fprs,
2674                                  (__vector128 *) vcpu->run->s.regs.vrs);
2675         else
2676                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2677         fpu->fpc = vcpu->run->s.regs.fpc;
2678         return 0;
2679 }
2680
2681 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2682 {
2683         int rc = 0;
2684
2685         if (!is_vcpu_stopped(vcpu))
2686                 rc = -EBUSY;
2687         else {
2688                 vcpu->run->psw_mask = psw.mask;
2689                 vcpu->run->psw_addr = psw.addr;
2690         }
2691         return rc;
2692 }
2693
2694 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2695                                   struct kvm_translation *tr)
2696 {
2697         return -EINVAL; /* not implemented yet */
2698 }
2699
2700 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2701                               KVM_GUESTDBG_USE_HW_BP | \
2702                               KVM_GUESTDBG_ENABLE)
2703
2704 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2705                                         struct kvm_guest_debug *dbg)
2706 {
2707         int rc = 0;
2708
2709         vcpu->guest_debug = 0;
2710         kvm_s390_clear_bp_data(vcpu);
2711
2712         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2713                 return -EINVAL;
2714         if (!sclp.has_gpere)
2715                 return -EINVAL;
2716
2717         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2718                 vcpu->guest_debug = dbg->control;
2719                 /* enforce guest PER */
2720                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2721
2722                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2723                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2724         } else {
2725                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2726                 vcpu->arch.guestdbg.last_bp = 0;
2727         }
2728
2729         if (rc) {
2730                 vcpu->guest_debug = 0;
2731                 kvm_s390_clear_bp_data(vcpu);
2732                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2733         }
2734
2735         return rc;
2736 }
2737
2738 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2739                                     struct kvm_mp_state *mp_state)
2740 {
2741         /* CHECK_STOP and LOAD are not supported yet */
2742         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2743                                        KVM_MP_STATE_OPERATING;
2744 }
2745
2746 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2747                                     struct kvm_mp_state *mp_state)
2748 {
2749         int rc = 0;
2750
2751         /* user space knows about this interface - let it control the state */
2752         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2753
2754         switch (mp_state->mp_state) {
2755         case KVM_MP_STATE_STOPPED:
2756                 kvm_s390_vcpu_stop(vcpu);
2757                 break;
2758         case KVM_MP_STATE_OPERATING:
2759                 kvm_s390_vcpu_start(vcpu);
2760                 break;
2761         case KVM_MP_STATE_LOAD:
2762         case KVM_MP_STATE_CHECK_STOP:
2763                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2764         default:
2765                 rc = -ENXIO;
2766         }
2767
2768         return rc;
2769 }
2770
2771 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2772 {
2773         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2774 }
2775
2776 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2777 {
2778 retry:
2779         kvm_s390_vcpu_request_handled(vcpu);
2780         if (!vcpu->requests)
2781                 return 0;
2782         /*
2783          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2784          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2785          * This ensures that the ipte instruction for this request has
2786          * already finished. We might race against a second unmapper that
2787          * wants to set the blocking bit. Lets just retry the request loop.
2788          */
2789         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2790                 int rc;
2791                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2792                                           kvm_s390_get_prefix(vcpu),
2793                                           PAGE_SIZE * 2, PROT_WRITE);
2794                 if (rc) {
2795                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2796                         return rc;
2797                 }
2798                 goto retry;
2799         }
2800
2801         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2802                 vcpu->arch.sie_block->ihcpu = 0xffff;
2803                 goto retry;
2804         }
2805
2806         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2807                 if (!ibs_enabled(vcpu)) {
2808                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2809                         atomic_or(CPUSTAT_IBS,
2810                                         &vcpu->arch.sie_block->cpuflags);
2811                 }
2812                 goto retry;
2813         }
2814
2815         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2816                 if (ibs_enabled(vcpu)) {
2817                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2818                         atomic_andnot(CPUSTAT_IBS,
2819                                           &vcpu->arch.sie_block->cpuflags);
2820                 }
2821                 goto retry;
2822         }
2823
2824         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2825                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2826                 goto retry;
2827         }
2828
2829         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2830                 /*
2831                  * Disable CMMA virtualization; we will emulate the ESSA
2832                  * instruction manually, in order to provide additional
2833                  * functionalities needed for live migration.
2834                  */
2835                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2836                 goto retry;
2837         }
2838
2839         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2840                 /*
2841                  * Re-enable CMMA virtualization if CMMA is available and
2842                  * was used.
2843                  */
2844                 if ((vcpu->kvm->arch.use_cmma) &&
2845                     (vcpu->kvm->mm->context.use_cmma))
2846                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2847                 goto retry;
2848         }
2849
2850         /* nothing to do, just clear the request */
2851         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2852
2853         return 0;
2854 }
2855
2856 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2857 {
2858         struct kvm_vcpu *vcpu;
2859         int i;
2860
2861         mutex_lock(&kvm->lock);
2862         preempt_disable();
2863         kvm->arch.epoch = tod - get_tod_clock();
2864         kvm_s390_vcpu_block_all(kvm);
2865         kvm_for_each_vcpu(i, vcpu, kvm)
2866                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2867         kvm_s390_vcpu_unblock_all(kvm);
2868         preempt_enable();
2869         mutex_unlock(&kvm->lock);
2870 }
2871
2872 /**
2873  * kvm_arch_fault_in_page - fault-in guest page if necessary
2874  * @vcpu: The corresponding virtual cpu
2875  * @gpa: Guest physical address
2876  * @writable: Whether the page should be writable or not
2877  *
2878  * Make sure that a guest page has been faulted-in on the host.
2879  *
2880  * Return: Zero on success, negative error code otherwise.
2881  */
2882 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2883 {
2884         return gmap_fault(vcpu->arch.gmap, gpa,
2885                           writable ? FAULT_FLAG_WRITE : 0);
2886 }
2887
2888 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2889                                       unsigned long token)
2890 {
2891         struct kvm_s390_interrupt inti;
2892         struct kvm_s390_irq irq;
2893
2894         if (start_token) {
2895                 irq.u.ext.ext_params2 = token;
2896                 irq.type = KVM_S390_INT_PFAULT_INIT;
2897                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2898         } else {
2899                 inti.type = KVM_S390_INT_PFAULT_DONE;
2900                 inti.parm64 = token;
2901                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2902         }
2903 }
2904
2905 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2906                                      struct kvm_async_pf *work)
2907 {
2908         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2909         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2910 }
2911
2912 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2913                                  struct kvm_async_pf *work)
2914 {
2915         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2916         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2917 }
2918
2919 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2920                                struct kvm_async_pf *work)
2921 {
2922         /* s390 will always inject the page directly */
2923 }
2924
2925 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2926 {
2927         /*
2928          * s390 will always inject the page directly,
2929          * but we still want check_async_completion to cleanup
2930          */
2931         return true;
2932 }
2933
2934 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2935 {
2936         hva_t hva;
2937         struct kvm_arch_async_pf arch;
2938         int rc;
2939
2940         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2941                 return 0;
2942         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2943             vcpu->arch.pfault_compare)
2944                 return 0;
2945         if (psw_extint_disabled(vcpu))
2946                 return 0;
2947         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2948                 return 0;
2949         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2950                 return 0;
2951         if (!vcpu->arch.gmap->pfault_enabled)
2952                 return 0;
2953
2954         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2955         hva += current->thread.gmap_addr & ~PAGE_MASK;
2956         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2957                 return 0;
2958
2959         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2960         return rc;
2961 }
2962
2963 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2964 {
2965         int rc, cpuflags;
2966
2967         /*
2968          * On s390 notifications for arriving pages will be delivered directly
2969          * to the guest but the house keeping for completed pfaults is
2970          * handled outside the worker.
2971          */
2972         kvm_check_async_pf_completion(vcpu);
2973
2974         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2975         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2976
2977         if (need_resched())
2978                 schedule();
2979
2980         if (test_cpu_flag(CIF_MCCK_PENDING))
2981                 s390_handle_mcck();
2982
2983         if (!kvm_is_ucontrol(vcpu->kvm)) {
2984                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2985                 if (rc)
2986                         return rc;
2987         }
2988
2989         rc = kvm_s390_handle_requests(vcpu);
2990         if (rc)
2991                 return rc;
2992
2993         if (guestdbg_enabled(vcpu)) {
2994                 kvm_s390_backup_guest_per_regs(vcpu);
2995                 kvm_s390_patch_guest_per_regs(vcpu);
2996         }
2997
2998         vcpu->arch.sie_block->icptcode = 0;
2999         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3000         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3001         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3002
3003         return 0;
3004 }
3005
3006 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3007 {
3008         struct kvm_s390_pgm_info pgm_info = {
3009                 .code = PGM_ADDRESSING,
3010         };
3011         u8 opcode, ilen;
3012         int rc;
3013
3014         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3015         trace_kvm_s390_sie_fault(vcpu);
3016
3017         /*
3018          * We want to inject an addressing exception, which is defined as a
3019          * suppressing or terminating exception. However, since we came here
3020          * by a DAT access exception, the PSW still points to the faulting
3021          * instruction since DAT exceptions are nullifying. So we've got
3022          * to look up the current opcode to get the length of the instruction
3023          * to be able to forward the PSW.
3024          */
3025         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3026         ilen = insn_length(opcode);
3027         if (rc < 0) {
3028                 return rc;
3029         } else if (rc) {
3030                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3031                  * Forward by arbitrary ilc, injection will take care of
3032                  * nullification if necessary.
3033                  */
3034                 pgm_info = vcpu->arch.pgm;
3035                 ilen = 4;
3036         }
3037         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3038         kvm_s390_forward_psw(vcpu, ilen);
3039         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3040 }
3041
3042 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3043 {
3044         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3045                    vcpu->arch.sie_block->icptcode);
3046         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3047
3048         if (guestdbg_enabled(vcpu))
3049                 kvm_s390_restore_guest_per_regs(vcpu);
3050
3051         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3052         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3053
3054         if (vcpu->arch.sie_block->icptcode > 0) {
3055                 int rc = kvm_handle_sie_intercept(vcpu);
3056
3057                 if (rc != -EOPNOTSUPP)
3058                         return rc;
3059                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3060                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3061                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3062                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3063                 return -EREMOTE;
3064         } else if (exit_reason != -EFAULT) {
3065                 vcpu->stat.exit_null++;
3066                 return 0;
3067         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3068                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3069                 vcpu->run->s390_ucontrol.trans_exc_code =
3070                                                 current->thread.gmap_addr;
3071                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3072                 return -EREMOTE;
3073         } else if (current->thread.gmap_pfault) {
3074                 trace_kvm_s390_major_guest_pfault(vcpu);
3075                 current->thread.gmap_pfault = 0;
3076                 if (kvm_arch_setup_async_pf(vcpu))
3077                         return 0;
3078                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3079         }
3080         return vcpu_post_run_fault_in_sie(vcpu);
3081 }
3082
3083 static int __vcpu_run(struct kvm_vcpu *vcpu)
3084 {
3085         int rc, exit_reason;
3086
3087         /*
3088          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3089          * ning the guest), so that memslots (and other stuff) are protected
3090          */
3091         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3092
3093         do {
3094                 rc = vcpu_pre_run(vcpu);
3095                 if (rc)
3096                         break;
3097
3098                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3099                 /*
3100                  * As PF_VCPU will be used in fault handler, between
3101                  * guest_enter and guest_exit should be no uaccess.
3102                  */
3103                 local_irq_disable();
3104                 guest_enter_irqoff();
3105                 __disable_cpu_timer_accounting(vcpu);
3106                 local_irq_enable();
3107                 exit_reason = sie64a(vcpu->arch.sie_block,
3108                                      vcpu->run->s.regs.gprs);
3109                 local_irq_disable();
3110                 __enable_cpu_timer_accounting(vcpu);
3111                 guest_exit_irqoff();
3112                 local_irq_enable();
3113                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3114
3115                 rc = vcpu_post_run(vcpu, exit_reason);
3116         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3117
3118         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3119         return rc;
3120 }
3121
3122 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3123 {
3124         struct runtime_instr_cb *riccb;
3125         struct gs_cb *gscb;
3126
3127         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3128         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3129         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3130         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3131         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3132                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3133         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3134                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3135                 /* some control register changes require a tlb flush */
3136                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3137         }
3138         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3139                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3140                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3141                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3142                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3143                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3144         }
3145         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3146                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3147                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3148                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3149                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3150                         kvm_clear_async_pf_completion_queue(vcpu);
3151         }
3152         /*
3153          * If userspace sets the riccb (e.g. after migration) to a valid state,
3154          * we should enable RI here instead of doing the lazy enablement.
3155          */
3156         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3157             test_kvm_facility(vcpu->kvm, 64) &&
3158             riccb->valid &&
3159             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3160                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3161                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3162         }
3163         /*
3164          * If userspace sets the gscb (e.g. after migration) to non-zero,
3165          * we should enable GS here instead of doing the lazy enablement.
3166          */
3167         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3168             test_kvm_facility(vcpu->kvm, 133) &&
3169             gscb->gssm &&
3170             !vcpu->arch.gs_enabled) {
3171                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3172                 vcpu->arch.sie_block->ecb |= ECB_GS;
3173                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3174                 vcpu->arch.gs_enabled = 1;
3175         }
3176         save_access_regs(vcpu->arch.host_acrs);
3177         restore_access_regs(vcpu->run->s.regs.acrs);
3178         /* save host (userspace) fprs/vrs */
3179         save_fpu_regs();
3180         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3181         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3182         if (MACHINE_HAS_VX)
3183                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3184         else
3185                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3186         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3187         if (test_fp_ctl(current->thread.fpu.fpc))
3188                 /* User space provided an invalid FPC, let's clear it */
3189                 current->thread.fpu.fpc = 0;
3190         if (MACHINE_HAS_GS) {
3191                 preempt_disable();
3192                 __ctl_set_bit(2, 4);
3193                 if (current->thread.gs_cb) {
3194                         vcpu->arch.host_gscb = current->thread.gs_cb;
3195                         save_gs_cb(vcpu->arch.host_gscb);
3196                 }
3197                 if (vcpu->arch.gs_enabled) {
3198                         current->thread.gs_cb = (struct gs_cb *)
3199                                                 &vcpu->run->s.regs.gscb;
3200                         restore_gs_cb(current->thread.gs_cb);
3201                 }
3202                 preempt_enable();
3203         }
3204
3205         kvm_run->kvm_dirty_regs = 0;
3206 }
3207
3208 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3209 {
3210         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3211         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3212         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3213         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3214         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3215         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3216         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3217         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3218         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3219         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3220         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3221         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3222         save_access_regs(vcpu->run->s.regs.acrs);
3223         restore_access_regs(vcpu->arch.host_acrs);
3224         /* Save guest register state */
3225         save_fpu_regs();
3226         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3227         /* Restore will be done lazily at return */
3228         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3229         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3230         if (MACHINE_HAS_GS) {
3231                 __ctl_set_bit(2, 4);
3232                 if (vcpu->arch.gs_enabled)
3233                         save_gs_cb(current->thread.gs_cb);
3234                 preempt_disable();
3235                 current->thread.gs_cb = vcpu->arch.host_gscb;
3236                 restore_gs_cb(vcpu->arch.host_gscb);
3237                 preempt_enable();
3238                 if (!vcpu->arch.host_gscb)
3239                         __ctl_clear_bit(2, 4);
3240                 vcpu->arch.host_gscb = NULL;
3241         }
3242
3243 }
3244
3245 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3246 {
3247         int rc;
3248         sigset_t sigsaved;
3249
3250         if (kvm_run->immediate_exit)
3251                 return -EINTR;
3252
3253         if (guestdbg_exit_pending(vcpu)) {
3254                 kvm_s390_prepare_debug_exit(vcpu);
3255                 return 0;
3256         }
3257
3258         if (vcpu->sigset_active)
3259                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3260
3261         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3262                 kvm_s390_vcpu_start(vcpu);
3263         } else if (is_vcpu_stopped(vcpu)) {
3264                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3265                                    vcpu->vcpu_id);
3266                 return -EINVAL;
3267         }
3268
3269         sync_regs(vcpu, kvm_run);
3270         enable_cpu_timer_accounting(vcpu);
3271
3272         might_fault();
3273         rc = __vcpu_run(vcpu);
3274
3275         if (signal_pending(current) && !rc) {
3276                 kvm_run->exit_reason = KVM_EXIT_INTR;
3277                 rc = -EINTR;
3278         }
3279
3280         if (guestdbg_exit_pending(vcpu) && !rc)  {
3281                 kvm_s390_prepare_debug_exit(vcpu);
3282                 rc = 0;
3283         }
3284
3285         if (rc == -EREMOTE) {
3286                 /* userspace support is needed, kvm_run has been prepared */
3287                 rc = 0;
3288         }
3289
3290         disable_cpu_timer_accounting(vcpu);
3291         store_regs(vcpu, kvm_run);
3292
3293         if (vcpu->sigset_active)
3294                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3295
3296         vcpu->stat.exit_userspace++;
3297         return rc;
3298 }
3299
3300 /*
3301  * store status at address
3302  * we use have two special cases:
3303  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3304  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3305  */
3306 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3307 {
3308         unsigned char archmode = 1;
3309         freg_t fprs[NUM_FPRS];
3310         unsigned int px;
3311         u64 clkcomp, cputm;
3312         int rc;
3313
3314         px = kvm_s390_get_prefix(vcpu);
3315         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3316                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3317                         return -EFAULT;
3318                 gpa = 0;
3319         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3320                 if (write_guest_real(vcpu, 163, &archmode, 1))
3321                         return -EFAULT;
3322                 gpa = px;
3323         } else
3324                 gpa -= __LC_FPREGS_SAVE_AREA;
3325
3326         /* manually convert vector registers if necessary */
3327         if (MACHINE_HAS_VX) {
3328                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3329                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3330                                      fprs, 128);
3331         } else {
3332                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3333                                      vcpu->run->s.regs.fprs, 128);
3334         }
3335         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3336                               vcpu->run->s.regs.gprs, 128);
3337         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3338                               &vcpu->arch.sie_block->gpsw, 16);
3339         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3340                               &px, 4);
3341         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3342                               &vcpu->run->s.regs.fpc, 4);
3343         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3344                               &vcpu->arch.sie_block->todpr, 4);
3345         cputm = kvm_s390_get_cpu_timer(vcpu);
3346         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3347                               &cputm, 8);
3348         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3349         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3350                               &clkcomp, 8);
3351         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3352                               &vcpu->run->s.regs.acrs, 64);
3353         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3354                               &vcpu->arch.sie_block->gcr, 128);
3355         return rc ? -EFAULT : 0;
3356 }
3357
3358 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3359 {
3360         /*
3361          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3362          * switch in the run ioctl. Let's update our copies before we save
3363          * it into the save area
3364          */
3365         save_fpu_regs();
3366         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3367         save_access_regs(vcpu->run->s.regs.acrs);
3368
3369         return kvm_s390_store_status_unloaded(vcpu, addr);
3370 }
3371
3372 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3373 {
3374         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3375         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3376 }
3377
3378 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3379 {
3380         unsigned int i;
3381         struct kvm_vcpu *vcpu;
3382
3383         kvm_for_each_vcpu(i, vcpu, kvm) {
3384                 __disable_ibs_on_vcpu(vcpu);
3385         }
3386 }
3387
3388 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3389 {
3390         if (!sclp.has_ibs)
3391                 return;
3392         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3393         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3394 }
3395
3396 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3397 {
3398         int i, online_vcpus, started_vcpus = 0;
3399
3400         if (!is_vcpu_stopped(vcpu))
3401                 return;
3402
3403         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3404         /* Only one cpu at a time may enter/leave the STOPPED state. */
3405         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3406         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3407
3408         for (i = 0; i < online_vcpus; i++) {
3409                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3410                         started_vcpus++;
3411         }
3412
3413         if (started_vcpus == 0) {
3414                 /* we're the only active VCPU -> speed it up */
3415                 __enable_ibs_on_vcpu(vcpu);
3416         } else if (started_vcpus == 1) {
3417                 /*
3418                  * As we are starting a second VCPU, we have to disable
3419                  * the IBS facility on all VCPUs to remove potentially
3420                  * oustanding ENABLE requests.
3421                  */
3422                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3423         }
3424
3425         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3426         /*
3427          * Another VCPU might have used IBS while we were offline.
3428          * Let's play safe and flush the VCPU at startup.
3429          */
3430         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3431         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3432         return;
3433 }
3434
3435 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3436 {
3437         int i, online_vcpus, started_vcpus = 0;
3438         struct kvm_vcpu *started_vcpu = NULL;
3439
3440         if (is_vcpu_stopped(vcpu))
3441                 return;
3442
3443         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3444         /* Only one cpu at a time may enter/leave the STOPPED state. */
3445         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3446         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3447
3448         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3449         kvm_s390_clear_stop_irq(vcpu);
3450
3451         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3452         __disable_ibs_on_vcpu(vcpu);
3453
3454         for (i = 0; i < online_vcpus; i++) {
3455                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3456                         started_vcpus++;
3457                         started_vcpu = vcpu->kvm->vcpus[i];
3458                 }
3459         }
3460
3461         if (started_vcpus == 1) {
3462                 /*
3463                  * As we only have one VCPU left, we want to enable the
3464                  * IBS facility for that VCPU to speed it up.
3465                  */
3466                 __enable_ibs_on_vcpu(started_vcpu);
3467         }
3468
3469         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3470         return;
3471 }
3472
3473 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3474                                      struct kvm_enable_cap *cap)
3475 {
3476         int r;
3477
3478         if (cap->flags)
3479                 return -EINVAL;
3480
3481         switch (cap->cap) {
3482         case KVM_CAP_S390_CSS_SUPPORT:
3483                 if (!vcpu->kvm->arch.css_support) {
3484                         vcpu->kvm->arch.css_support = 1;
3485                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3486                         trace_kvm_s390_enable_css(vcpu->kvm);
3487                 }
3488                 r = 0;
3489                 break;
3490         default:
3491                 r = -EINVAL;
3492                 break;
3493         }
3494         return r;
3495 }
3496
3497 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3498                                   struct kvm_s390_mem_op *mop)
3499 {
3500         void __user *uaddr = (void __user *)mop->buf;
3501         void *tmpbuf = NULL;
3502         int r, srcu_idx;
3503         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3504                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3505
3506         if (mop->flags & ~supported_flags)
3507                 return -EINVAL;
3508
3509         if (mop->size > MEM_OP_MAX_SIZE)
3510                 return -E2BIG;
3511
3512         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3513                 tmpbuf = vmalloc(mop->size);
3514                 if (!tmpbuf)
3515                         return -ENOMEM;
3516         }
3517
3518         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3519
3520         switch (mop->op) {
3521         case KVM_S390_MEMOP_LOGICAL_READ:
3522                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3523                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3524                                             mop->size, GACC_FETCH);
3525                         break;
3526                 }
3527                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3528                 if (r == 0) {
3529                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3530                                 r = -EFAULT;
3531                 }
3532                 break;
3533         case KVM_S390_MEMOP_LOGICAL_WRITE:
3534                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3535                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3536                                             mop->size, GACC_STORE);
3537                         break;
3538                 }
3539                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3540                         r = -EFAULT;
3541                         break;
3542                 }
3543                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3544                 break;
3545         default:
3546                 r = -EINVAL;
3547         }
3548
3549         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3550
3551         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3552                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3553
3554         vfree(tmpbuf);
3555         return r;
3556 }
3557
3558 long kvm_arch_vcpu_ioctl(struct file *filp,
3559                          unsigned int ioctl, unsigned long arg)
3560 {
3561         struct kvm_vcpu *vcpu = filp->private_data;
3562         void __user *argp = (void __user *)arg;
3563         int idx;
3564         long r;
3565
3566         switch (ioctl) {
3567         case KVM_S390_IRQ: {
3568                 struct kvm_s390_irq s390irq;
3569
3570                 r = -EFAULT;
3571                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3572                         break;
3573                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3574                 break;
3575         }
3576         case KVM_S390_INTERRUPT: {
3577                 struct kvm_s390_interrupt s390int;
3578                 struct kvm_s390_irq s390irq;
3579
3580                 r = -EFAULT;
3581                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3582                         break;
3583                 if (s390int_to_s390irq(&s390int, &s390irq))
3584                         return -EINVAL;
3585                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3586                 break;
3587         }
3588         case KVM_S390_STORE_STATUS:
3589                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3590                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3591                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3592                 break;
3593         case KVM_S390_SET_INITIAL_PSW: {
3594                 psw_t psw;
3595
3596                 r = -EFAULT;
3597                 if (copy_from_user(&psw, argp, sizeof(psw)))
3598                         break;
3599                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3600                 break;
3601         }
3602         case KVM_S390_INITIAL_RESET:
3603                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3604                 break;
3605         case KVM_SET_ONE_REG:
3606         case KVM_GET_ONE_REG: {
3607                 struct kvm_one_reg reg;
3608                 r = -EFAULT;
3609                 if (copy_from_user(&reg, argp, sizeof(reg)))
3610                         break;
3611                 if (ioctl == KVM_SET_ONE_REG)
3612                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3613                 else
3614                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3615                 break;
3616         }
3617 #ifdef CONFIG_KVM_S390_UCONTROL
3618         case KVM_S390_UCAS_MAP: {
3619                 struct kvm_s390_ucas_mapping ucasmap;
3620
3621                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3622                         r = -EFAULT;
3623                         break;
3624                 }
3625
3626                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3627                         r = -EINVAL;
3628                         break;
3629                 }
3630
3631                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3632                                      ucasmap.vcpu_addr, ucasmap.length);
3633                 break;
3634         }
3635         case KVM_S390_UCAS_UNMAP: {
3636                 struct kvm_s390_ucas_mapping ucasmap;
3637
3638                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3639                         r = -EFAULT;
3640                         break;
3641                 }
3642
3643                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3644                         r = -EINVAL;
3645                         break;
3646                 }
3647
3648                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3649                         ucasmap.length);
3650                 break;
3651         }
3652 #endif
3653         case KVM_S390_VCPU_FAULT: {
3654                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3655                 break;
3656         }
3657         case KVM_ENABLE_CAP:
3658         {
3659                 struct kvm_enable_cap cap;
3660                 r = -EFAULT;
3661                 if (copy_from_user(&cap, argp, sizeof(cap)))
3662                         break;
3663                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3664                 break;
3665         }
3666         case KVM_S390_MEM_OP: {
3667                 struct kvm_s390_mem_op mem_op;
3668
3669                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3670                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3671                 else
3672                         r = -EFAULT;
3673                 break;
3674         }
3675         case KVM_S390_SET_IRQ_STATE: {
3676                 struct kvm_s390_irq_state irq_state;
3677
3678                 r = -EFAULT;
3679                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3680                         break;
3681                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3682                     irq_state.len == 0 ||
3683                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3684                         r = -EINVAL;
3685                         break;
3686                 }
3687                 r = kvm_s390_set_irq_state(vcpu,
3688                                            (void __user *) irq_state.buf,
3689                                            irq_state.len);
3690                 break;
3691         }
3692         case KVM_S390_GET_IRQ_STATE: {
3693                 struct kvm_s390_irq_state irq_state;
3694
3695                 r = -EFAULT;
3696                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3697                         break;
3698                 if (irq_state.len == 0) {
3699                         r = -EINVAL;
3700                         break;
3701                 }
3702                 r = kvm_s390_get_irq_state(vcpu,
3703                                            (__u8 __user *)  irq_state.buf,
3704                                            irq_state.len);
3705                 break;
3706         }
3707         default:
3708                 r = -ENOTTY;
3709         }
3710         return r;
3711 }
3712
3713 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3714 {
3715 #ifdef CONFIG_KVM_S390_UCONTROL
3716         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3717                  && (kvm_is_ucontrol(vcpu->kvm))) {
3718                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3719                 get_page(vmf->page);
3720                 return 0;
3721         }
3722 #endif
3723         return VM_FAULT_SIGBUS;
3724 }
3725
3726 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3727                             unsigned long npages)
3728 {
3729         return 0;
3730 }
3731
3732 /* Section: memory related */
3733 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3734                                    struct kvm_memory_slot *memslot,
3735                                    const struct kvm_userspace_memory_region *mem,
3736                                    enum kvm_mr_change change)
3737 {
3738         /* A few sanity checks. We can have memory slots which have to be
3739            located/ended at a segment boundary (1MB). The memory in userland is
3740            ok to be fragmented into various different vmas. It is okay to mmap()
3741            and munmap() stuff in this slot after doing this call at any time */
3742
3743         if (mem->userspace_addr & 0xffffful)
3744                 return -EINVAL;
3745
3746         if (mem->memory_size & 0xffffful)
3747                 return -EINVAL;
3748
3749         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3750                 return -EINVAL;
3751
3752         return 0;
3753 }
3754
3755 void kvm_arch_commit_memory_region(struct kvm *kvm,
3756                                 const struct kvm_userspace_memory_region *mem,
3757                                 const struct kvm_memory_slot *old,
3758                                 const struct kvm_memory_slot *new,
3759                                 enum kvm_mr_change change)
3760 {
3761         int rc;
3762
3763         /* If the basics of the memslot do not change, we do not want
3764          * to update the gmap. Every update causes several unnecessary
3765          * segment translation exceptions. This is usually handled just
3766          * fine by the normal fault handler + gmap, but it will also
3767          * cause faults on the prefix page of running guest CPUs.
3768          */
3769         if (old->userspace_addr == mem->userspace_addr &&
3770             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3771             old->npages * PAGE_SIZE == mem->memory_size)
3772                 return;
3773
3774         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3775                 mem->guest_phys_addr, mem->memory_size);
3776         if (rc)
3777                 pr_warn("failed to commit memory region\n");
3778         return;
3779 }
3780
3781 static inline unsigned long nonhyp_mask(int i)
3782 {
3783         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3784
3785         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3786 }
3787
3788 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3789 {
3790         vcpu->valid_wakeup = false;
3791 }
3792
3793 static int __init kvm_s390_init(void)
3794 {
3795         int i;
3796
3797         if (!sclp.has_sief2) {
3798                 pr_info("SIE not available\n");
3799                 return -ENODEV;
3800         }
3801
3802         for (i = 0; i < 16; i++)
3803                 kvm_s390_fac_list_mask[i] |=
3804                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3805
3806         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3807 }
3808
3809 static void __exit kvm_s390_exit(void)
3810 {
3811         kvm_exit();
3812 }
3813
3814 module_init(kvm_s390_init);
3815 module_exit(kvm_s390_exit);
3816
3817 /*
3818  * Enable autoloading of the kvm module.
3819  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3820  * since x86 takes a different approach.
3821  */
3822 #include <linux/miscdevice.h>
3823 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3824 MODULE_ALIAS("devname:kvm");