arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include <asm/ap.h>
  44 #include "kvm-s390.h"
  45 #include "gaccess.h"
  46
  47 #define KMSG_COMPONENT "kvm-s390"
  48 #undef pr_fmt
  49 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  50
  51 #define CREATE_TRACE_POINTS
  52 #include "trace.h"
  53 #include "trace-s390.h"
  54
  55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  56 #define LOCAL_IRQS 32
  57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  59
  60 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  61 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  62
  63 struct kvm_stats_debugfs_item debugfs_entries[] = {
  64         { "userspace_handled", VCPU_STAT(exit_userspace) },
  65         { "exit_null", VCPU_STAT(exit_null) },
  66         { "exit_validity", VCPU_STAT(exit_validity) },
  67         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  68         { "exit_external_request", VCPU_STAT(exit_external_request) },
  69         { "exit_io_request", VCPU_STAT(exit_io_request) },
  70         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  71         { "exit_instruction", VCPU_STAT(exit_instruction) },
  72         { "exit_pei", VCPU_STAT(exit_pei) },
  73         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  74         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  75         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  76         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  77         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  78         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 159         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 160         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 161         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 162         { NULL }
 163 };
 164
 165 struct kvm_s390_tod_clock_ext {
 166         __u8 epoch_idx;
 167         __u64 tod;
 168         __u8 reserved[7];
 169 } __packed;
 170
 171 /* allow nested virtualization in KVM (if enabled by user space) */
 172 static int nested;
 173 module_param(nested, int, S_IRUGO);
 174 MODULE_PARM_DESC(nested, "Nested virtualization support");
 175
 176 /* allow 1m huge page guest backing, if !nested */
 177 static int hpage;
 178 module_param(hpage, int, 0444);
 179 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 180
 181 /*
 182  * For now we handle at most 16 double words as this is what the s390 base
 183  * kernel handles and stores in the prefix page. If we ever need to go beyond
 184  * this, this requires changes to code, but the external uapi can stay.
 185  */
 186 #define SIZE_INTERNAL 16
 187
 188 /*
 189  * Base feature mask that defines default mask for facilities. Consists of the
 190  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 191  */
 192 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 193 /*
 194  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 195  * and defines the facilities that can be enabled via a cpu model.
 196  */
 197 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 198
 199 static unsigned long kvm_s390_fac_size(void)
 200 {
 201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 202         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 203         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 204                 sizeof(S390_lowcore.stfle_fac_list));
 205
 206         return SIZE_INTERNAL;
 207 }
 208
 209 /* available cpu features supported by kvm */
 210 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 211 /* available subfunctions indicated via query / "test bit" */
 212 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 213
 214 static struct gmap_notifier gmap_notifier;
 215 static struct gmap_notifier vsie_gmap_notifier;
 216 debug_info_t *kvm_s390_dbf;
 217
 218 /* Section: not file related */
 219 int kvm_arch_hardware_enable(void)
 220 {
 221         /* every s390 is virtualization enabled ;-) */
 222         return 0;
 223 }
 224
 225 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 226                               unsigned long end);
 227
 228 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 229 {
 230         u8 delta_idx = 0;
 231
 232         /*
 233          * The TOD jumps by delta, we have to compensate this by adding
 234          * -delta to the epoch.
 235          */
 236         delta = -delta;
 237
 238         /* sign-extension - we're adding to signed values below */
 239         if ((s64)delta < 0)
 240                 delta_idx = -1;
 241
 242         scb->epoch += delta;
 243         if (scb->ecd & ECD_MEF) {
 244                 scb->epdx += delta_idx;
 245                 if (scb->epoch < delta)
 246                         scb->epdx += 1;
 247         }
 248 }
 249
 250 /*
 251  * This callback is executed during stop_machine(). All CPUs are therefore
 252  * temporarily stopped. In order not to change guest behavior, we have to
 253  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 254  * so a CPU won't be stopped while calculating with the epoch.
 255  */
 256 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 257                           void *v)
 258 {
 259         struct kvm *kvm;
 260         struct kvm_vcpu *vcpu;
 261         int i;
 262         unsigned long long *delta = v;
 263
 264         list_for_each_entry(kvm, &vm_list, vm_list) {
 265                 kvm_for_each_vcpu(i, vcpu, kvm) {
 266                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 267                         if (i == 0) {
 268                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 269                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 270                         }
 271                         if (vcpu->arch.cputm_enabled)
 272                                 vcpu->arch.cputm_start += *delta;
 273                         if (vcpu->arch.vsie_block)
 274                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 275                                                    *delta);
 276                 }
 277         }
 278         return NOTIFY_OK;
 279 }
 280
 281 static struct notifier_block kvm_clock_notifier = {
 282         .notifier_call = kvm_clock_sync,
 283 };
 284
 285 int kvm_arch_hardware_setup(void)
 286 {
 287         gmap_notifier.notifier_call = kvm_gmap_notifier;
 288         gmap_register_pte_notifier(&gmap_notifier);
 289         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 290         gmap_register_pte_notifier(&vsie_gmap_notifier);
 291         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 292                                        &kvm_clock_notifier);
 293         return 0;
 294 }
 295
 296 void kvm_arch_hardware_unsetup(void)
 297 {
 298         gmap_unregister_pte_notifier(&gmap_notifier);
 299         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 300         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 301                                          &kvm_clock_notifier);
 302 }
 303
 304 static void allow_cpu_feat(unsigned long nr)
 305 {
 306         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 307 }
 308
 309 static inline int plo_test_bit(unsigned char nr)
 310 {
 311         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 312         int cc;
 313
 314         asm volatile(
 315                 /* Parameter registers are ignored for "test bit" */
 316                 "       plo     0,0,0,0(0)\n"
 317                 "       ipm     %0\n"
 318                 "       srl     %0,28\n"
 319                 : "=d" (cc)
 320                 : "d" (r0)
 321                 : "cc");
 322         return cc == 0;
 323 }
 324
 325 static void kvm_s390_cpu_feat_init(void)
 326 {
 327         int i;
 328
 329         for (i = 0; i < 256; ++i) {
 330                 if (plo_test_bit(i))
 331                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 332         }
 333
 334         if (test_facility(28)) /* TOD-clock steering */
 335                 ptff(kvm_s390_available_subfunc.ptff,
 336                      sizeof(kvm_s390_available_subfunc.ptff),
 337                      PTFF_QAF);
 338
 339         if (test_facility(17)) { /* MSA */
 340                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 341                               kvm_s390_available_subfunc.kmac);
 342                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 343                               kvm_s390_available_subfunc.kmc);
 344                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 345                               kvm_s390_available_subfunc.km);
 346                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 347                               kvm_s390_available_subfunc.kimd);
 348                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 349                               kvm_s390_available_subfunc.klmd);
 350         }
 351         if (test_facility(76)) /* MSA3 */
 352                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 353                               kvm_s390_available_subfunc.pckmo);
 354         if (test_facility(77)) { /* MSA4 */
 355                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 356                               kvm_s390_available_subfunc.kmctr);
 357                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 358                               kvm_s390_available_subfunc.kmf);
 359                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 360                               kvm_s390_available_subfunc.kmo);
 361                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 362                               kvm_s390_available_subfunc.pcc);
 363         }
 364         if (test_facility(57)) /* MSA5 */
 365                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 366                               kvm_s390_available_subfunc.ppno);
 367
 368         if (test_facility(146)) /* MSA8 */
 369                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.kma);
 371
 372         if (MACHINE_HAS_ESOP)
 373                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 374         /*
 375          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 376          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 377          */
 378         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 379             !test_facility(3) || !nested)
 380                 return;
 381         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 382         if (sclp.has_64bscao)
 383                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 384         if (sclp.has_siif)
 385                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 386         if (sclp.has_gpere)
 387                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 388         if (sclp.has_gsls)
 389                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 390         if (sclp.has_ib)
 391                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 392         if (sclp.has_cei)
 393                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 394         if (sclp.has_ibs)
 395                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 396         if (sclp.has_kss)
 397                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 398         /*
 399          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 400          * all skey handling functions read/set the skey from the PGSTE
 401          * instead of the real storage key.
 402          *
 403          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 404          * pages being detected as preserved although they are resident.
 405          *
 406          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 407          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 408          *
 409          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 410          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 411          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 412          *
 413          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 414          * cannot easily shadow the SCA because of the ipte lock.
 415          */
 416 }
 417
 418 int kvm_arch_init(void *opaque)
 419 {
 420         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 421         if (!kvm_s390_dbf)
 422                 return -ENOMEM;
 423
 424         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 425                 debug_unregister(kvm_s390_dbf);
 426                 return -ENOMEM;
 427         }
 428
 429         kvm_s390_cpu_feat_init();
 430
 431         /* Register floating interrupt controller interface. */
 432         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 433 }
 434
 435 void kvm_arch_exit(void)
 436 {
 437         debug_unregister(kvm_s390_dbf);
 438 }
 439
 440 /* Section: device related */
 441 long kvm_arch_dev_ioctl(struct file *filp,
 442                         unsigned int ioctl, unsigned long arg)
 443 {
 444         if (ioctl == KVM_S390_ENABLE_SIE)
 445                 return s390_enable_sie();
 446         return -EINVAL;
 447 }
 448
 449 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 450 {
 451         int r;
 452
 453         switch (ext) {
 454         case KVM_CAP_S390_PSW:
 455         case KVM_CAP_S390_GMAP:
 456         case KVM_CAP_SYNC_MMU:
 457 #ifdef CONFIG_KVM_S390_UCONTROL
 458         case KVM_CAP_S390_UCONTROL:
 459 #endif
 460         case KVM_CAP_ASYNC_PF:
 461         case KVM_CAP_SYNC_REGS:
 462         case KVM_CAP_ONE_REG:
 463         case KVM_CAP_ENABLE_CAP:
 464         case KVM_CAP_S390_CSS_SUPPORT:
 465         case KVM_CAP_IOEVENTFD:
 466         case KVM_CAP_DEVICE_CTRL:
 467         case KVM_CAP_ENABLE_CAP_VM:
 468         case KVM_CAP_S390_IRQCHIP:
 469         case KVM_CAP_VM_ATTRIBUTES:
 470         case KVM_CAP_MP_STATE:
 471         case KVM_CAP_IMMEDIATE_EXIT:
 472         case KVM_CAP_S390_INJECT_IRQ:
 473         case KVM_CAP_S390_USER_SIGP:
 474         case KVM_CAP_S390_USER_STSI:
 475         case KVM_CAP_S390_SKEYS:
 476         case KVM_CAP_S390_IRQ_STATE:
 477         case KVM_CAP_S390_USER_INSTR0:
 478         case KVM_CAP_S390_CMMA_MIGRATION:
 479         case KVM_CAP_S390_AIS:
 480         case KVM_CAP_S390_AIS_MIGRATION:
 481                 r = 1;
 482                 break;
 483         case KVM_CAP_S390_HPAGE_1M:
 484                 r = 0;
 485                 if (hpage)
 486                         r = 1;
 487                 break;
 488         case KVM_CAP_S390_MEM_OP:
 489                 r = MEM_OP_MAX_SIZE;
 490                 break;
 491         case KVM_CAP_NR_VCPUS:
 492         case KVM_CAP_MAX_VCPUS:
 493                 r = KVM_S390_BSCA_CPU_SLOTS;
 494                 if (!kvm_s390_use_sca_entries())
 495                         r = KVM_MAX_VCPUS;
 496                 else if (sclp.has_esca && sclp.has_64bscao)
 497                         r = KVM_S390_ESCA_CPU_SLOTS;
 498                 break;
 499         case KVM_CAP_NR_MEMSLOTS:
 500                 r = KVM_USER_MEM_SLOTS;
 501                 break;
 502         case KVM_CAP_S390_COW:
 503                 r = MACHINE_HAS_ESOP;
 504                 break;
 505         case KVM_CAP_S390_VECTOR_REGISTERS:
 506                 r = MACHINE_HAS_VX;
 507                 break;
 508         case KVM_CAP_S390_RI:
 509                 r = test_facility(64);
 510                 break;
 511         case KVM_CAP_S390_GS:
 512                 r = test_facility(133);
 513                 break;
 514         case KVM_CAP_S390_BPB:
 515                 r = test_facility(82);
 516                 break;
 517         default:
 518                 r = 0;
 519         }
 520         return r;
 521 }
 522
 523 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 524                                     struct kvm_memory_slot *memslot)
 525 {
 526         int i;
 527         gfn_t cur_gfn, last_gfn;
 528         unsigned long gaddr, vmaddr;
 529         struct gmap *gmap = kvm->arch.gmap;
 530         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 531
 532         /* Loop over all guest segments */
 533         cur_gfn = memslot->base_gfn;
 534         last_gfn = memslot->base_gfn + memslot->npages;
 535         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 536                 gaddr = gfn_to_gpa(cur_gfn);
 537                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 538                 if (kvm_is_error_hva(vmaddr))
 539                         continue;
 540
 541                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 542                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 543                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 544                         if (test_bit(i, bitmap))
 545                                 mark_page_dirty(kvm, cur_gfn + i);
 546                 }
 547
 548                 if (fatal_signal_pending(current))
 549                         return;
 550                 cond_resched();
 551         }
 552 }
 553
 554 /* Section: vm related */
 555 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 556
 557 /*
 558  * Get (and clear) the dirty memory log for a memory slot.
 559  */
 560 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 561                                struct kvm_dirty_log *log)
 562 {
 563         int r;
 564         unsigned long n;
 565         struct kvm_memslots *slots;
 566         struct kvm_memory_slot *memslot;
 567         int is_dirty = 0;
 568
 569         if (kvm_is_ucontrol(kvm))
 570                 return -EINVAL;
 571
 572         mutex_lock(&kvm->slots_lock);
 573
 574         r = -EINVAL;
 575         if (log->slot >= KVM_USER_MEM_SLOTS)
 576                 goto out;
 577
 578         slots = kvm_memslots(kvm);
 579         memslot = id_to_memslot(slots, log->slot);
 580         r = -ENOENT;
 581         if (!memslot->dirty_bitmap)
 582                 goto out;
 583
 584         kvm_s390_sync_dirty_log(kvm, memslot);
 585         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 586         if (r)
 587                 goto out;
 588
 589         /* Clear the dirty log */
 590         if (is_dirty) {
 591                 n = kvm_dirty_bitmap_bytes(memslot);
 592                 memset(memslot->dirty_bitmap, 0, n);
 593         }
 594         r = 0;
 595 out:
 596         mutex_unlock(&kvm->slots_lock);
 597         return r;
 598 }
 599
 600 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 601 {
 602         unsigned int i;
 603         struct kvm_vcpu *vcpu;
 604
 605         kvm_for_each_vcpu(i, vcpu, kvm) {
 606                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 607         }
 608 }
 609
 610 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 611 {
 612         int r;
 613
 614         if (cap->flags)
 615                 return -EINVAL;
 616
 617         switch (cap->cap) {
 618         case KVM_CAP_S390_IRQCHIP:
 619                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 620                 kvm->arch.use_irqchip = 1;
 621                 r = 0;
 622                 break;
 623         case KVM_CAP_S390_USER_SIGP:
 624                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 625                 kvm->arch.user_sigp = 1;
 626                 r = 0;
 627                 break;
 628         case KVM_CAP_S390_VECTOR_REGISTERS:
 629                 mutex_lock(&kvm->lock);
 630                 if (kvm->created_vcpus) {
 631                         r = -EBUSY;
 632                 } else if (MACHINE_HAS_VX) {
 633                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 634                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 635                         if (test_facility(134)) {
 636                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 637                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 638                         }
 639                         if (test_facility(135)) {
 640                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 641                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 642                         }
 643                         r = 0;
 644                 } else
 645                         r = -EINVAL;
 646                 mutex_unlock(&kvm->lock);
 647                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 648                          r ? "(not available)" : "(success)");
 649                 break;
 650         case KVM_CAP_S390_RI:
 651                 r = -EINVAL;
 652                 mutex_lock(&kvm->lock);
 653                 if (kvm->created_vcpus) {
 654                         r = -EBUSY;
 655                 } else if (test_facility(64)) {
 656                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 657                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 658                         r = 0;
 659                 }
 660                 mutex_unlock(&kvm->lock);
 661                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 662                          r ? "(not available)" : "(success)");
 663                 break;
 664         case KVM_CAP_S390_AIS:
 665                 mutex_lock(&kvm->lock);
 666                 if (kvm->created_vcpus) {
 667                         r = -EBUSY;
 668                 } else {
 669                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 670                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 671                         r = 0;
 672                 }
 673                 mutex_unlock(&kvm->lock);
 674                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 675                          r ? "(not available)" : "(success)");
 676                 break;
 677         case KVM_CAP_S390_GS:
 678                 r = -EINVAL;
 679                 mutex_lock(&kvm->lock);
 680                 if (kvm->created_vcpus) {
 681                         r = -EBUSY;
 682                 } else if (test_facility(133)) {
 683                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 684                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 685                         r = 0;
 686                 }
 687                 mutex_unlock(&kvm->lock);
 688                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 689                          r ? "(not available)" : "(success)");
 690                 break;
 691         case KVM_CAP_S390_HPAGE_1M:
 692                 mutex_lock(&kvm->lock);
 693                 if (kvm->created_vcpus)
 694                         r = -EBUSY;
 695                 else if (!hpage || kvm->arch.use_cmma)
 696                         r = -EINVAL;
 697                 else {
 698                         r = 0;
 699                         down_write(&kvm->mm->mmap_sem);
 700                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 701                         up_write(&kvm->mm->mmap_sem);
 702                         /*
 703                          * We might have to create fake 4k page
 704                          * tables. To avoid that the hardware works on
 705                          * stale PGSTEs, we emulate these instructions.
 706                          */
 707                         kvm->arch.use_skf = 0;
 708                         kvm->arch.use_pfmfi = 0;
 709                 }
 710                 mutex_unlock(&kvm->lock);
 711                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 712                          r ? "(not available)" : "(success)");
 713                 break;
 714         case KVM_CAP_S390_USER_STSI:
 715                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 716                 kvm->arch.user_stsi = 1;
 717                 r = 0;
 718                 break;
 719         case KVM_CAP_S390_USER_INSTR0:
 720                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 721                 kvm->arch.user_instr0 = 1;
 722                 icpt_operexc_on_all_vcpus(kvm);
 723                 r = 0;
 724                 break;
 725         default:
 726                 r = -EINVAL;
 727                 break;
 728         }
 729         return r;
 730 }
 731
 732 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 733 {
 734         int ret;
 735
 736         switch (attr->attr) {
 737         case KVM_S390_VM_MEM_LIMIT_SIZE:
 738                 ret = 0;
 739                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 740                          kvm->arch.mem_limit);
 741                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 742                         ret = -EFAULT;
 743                 break;
 744         default:
 745                 ret = -ENXIO;
 746                 break;
 747         }
 748         return ret;
 749 }
 750
 751 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 752 {
 753         int ret;
 754         unsigned int idx;
 755         switch (attr->attr) {
 756         case KVM_S390_VM_MEM_ENABLE_CMMA:
 757                 ret = -ENXIO;
 758                 if (!sclp.has_cmma)
 759                         break;
 760
 761                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 762                 mutex_lock(&kvm->lock);
 763                 if (kvm->created_vcpus)
 764                         ret = -EBUSY;
 765                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 766                         ret = -EINVAL;
 767                 else {
 768                         kvm->arch.use_cmma = 1;
 769                         /* Not compatible with cmma. */
 770                         kvm->arch.use_pfmfi = 0;
 771                         ret = 0;
 772                 }
 773                 mutex_unlock(&kvm->lock);
 774                 break;
 775         case KVM_S390_VM_MEM_CLR_CMMA:
 776                 ret = -ENXIO;
 777                 if (!sclp.has_cmma)
 778                         break;
 779                 ret = -EINVAL;
 780                 if (!kvm->arch.use_cmma)
 781                         break;
 782
 783                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 784                 mutex_lock(&kvm->lock);
 785                 idx = srcu_read_lock(&kvm->srcu);
 786                 s390_reset_cmma(kvm->arch.gmap->mm);
 787                 srcu_read_unlock(&kvm->srcu, idx);
 788                 mutex_unlock(&kvm->lock);
 789                 ret = 0;
 790                 break;
 791         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 792                 unsigned long new_limit;
 793
 794                 if (kvm_is_ucontrol(kvm))
 795                         return -EINVAL;
 796
 797                 if (get_user(new_limit, (u64 __user *)attr->addr))
 798                         return -EFAULT;
 799
 800                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 801                     new_limit > kvm->arch.mem_limit)
 802                         return -E2BIG;
 803
 804                 if (!new_limit)
 805                         return -EINVAL;
 806
 807                 /* gmap_create takes last usable address */
 808                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 809                         new_limit -= 1;
 810
 811                 ret = -EBUSY;
 812                 mutex_lock(&kvm->lock);
 813                 if (!kvm->created_vcpus) {
 814                         /* gmap_create will round the limit up */
 815                         struct gmap *new = gmap_create(current->mm, new_limit);
 816
 817                         if (!new) {
 818                                 ret = -ENOMEM;
 819                         } else {
 820                                 gmap_remove(kvm->arch.gmap);
 821                                 new->private = kvm;
 822                                 kvm->arch.gmap = new;
 823                                 ret = 0;
 824                         }
 825                 }
 826                 mutex_unlock(&kvm->lock);
 827                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 828                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 829                          (void *) kvm->arch.gmap->asce);
 830                 break;
 831         }
 832         default:
 833                 ret = -ENXIO;
 834                 break;
 835         }
 836         return ret;
 837 }
 838
 839 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 840
 841 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 842 {
 843         struct kvm_vcpu *vcpu;
 844         int i;
 845
 846         kvm_s390_vcpu_block_all(kvm);
 847
 848         kvm_for_each_vcpu(i, vcpu, kvm) {
 849                 kvm_s390_vcpu_crypto_setup(vcpu);
 850                 /* recreate the shadow crycb by leaving the VSIE handler */
 851                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 852         }
 853
 854         kvm_s390_vcpu_unblock_all(kvm);
 855 }
 856
 857 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 858 {
 859         if (!test_kvm_facility(kvm, 76))
 860                 return -EINVAL;
 861
 862         mutex_lock(&kvm->lock);
 863         switch (attr->attr) {
 864         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 865                 get_random_bytes(
 866                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 867                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 868                 kvm->arch.crypto.aes_kw = 1;
 869                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 870                 break;
 871         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 872                 get_random_bytes(
 873                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 874                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 875                 kvm->arch.crypto.dea_kw = 1;
 876                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 877                 break;
 878         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 879                 kvm->arch.crypto.aes_kw = 0;
 880                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 881                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 882                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 883                 break;
 884         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 885                 kvm->arch.crypto.dea_kw = 0;
 886                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 887                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 888                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 889                 break;
 890         default:
 891                 mutex_unlock(&kvm->lock);
 892                 return -ENXIO;
 893         }
 894
 895         kvm_s390_vcpu_crypto_reset_all(kvm);
 896         mutex_unlock(&kvm->lock);
 897         return 0;
 898 }
 899
 900 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 901 {
 902         int cx;
 903         struct kvm_vcpu *vcpu;
 904
 905         kvm_for_each_vcpu(cx, vcpu, kvm)
 906                 kvm_s390_sync_request(req, vcpu);
 907 }
 908
 909 /*
 910  * Must be called with kvm->srcu held to avoid races on memslots, and with
 911  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 912  */
 913 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 914 {
 915         struct kvm_memory_slot *ms;
 916         struct kvm_memslots *slots;
 917         unsigned long ram_pages = 0;
 918         int slotnr;
 919
 920         /* migration mode already enabled */
 921         if (kvm->arch.migration_mode)
 922                 return 0;
 923         slots = kvm_memslots(kvm);
 924         if (!slots || !slots->used_slots)
 925                 return -EINVAL;
 926
 927         if (!kvm->arch.use_cmma) {
 928                 kvm->arch.migration_mode = 1;
 929                 return 0;
 930         }
 931         /* mark all the pages in active slots as dirty */
 932         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 933                 ms = slots->memslots + slotnr;
 934                 /*
 935                  * The second half of the bitmap is only used on x86,
 936                  * and would be wasted otherwise, so we put it to good
 937                  * use here to keep track of the state of the storage
 938                  * attributes.
 939                  */
 940                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
 941                 ram_pages += ms->npages;
 942         }
 943         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
 944         kvm->arch.migration_mode = 1;
 945         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 946         return 0;
 947 }
 948
 949 /*
 950  * Must be called with kvm->slots_lock to avoid races with ourselves and
 951  * kvm_s390_vm_start_migration.
 952  */
 953 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 954 {
 955         /* migration mode already disabled */
 956         if (!kvm->arch.migration_mode)
 957                 return 0;
 958         kvm->arch.migration_mode = 0;
 959         if (kvm->arch.use_cmma)
 960                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 961         return 0;
 962 }
 963
 964 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 965                                      struct kvm_device_attr *attr)
 966 {
 967         int res = -ENXIO;
 968
 969         mutex_lock(&kvm->slots_lock);
 970         switch (attr->attr) {
 971         case KVM_S390_VM_MIGRATION_START:
 972                 res = kvm_s390_vm_start_migration(kvm);
 973                 break;
 974         case KVM_S390_VM_MIGRATION_STOP:
 975                 res = kvm_s390_vm_stop_migration(kvm);
 976                 break;
 977         default:
 978                 break;
 979         }
 980         mutex_unlock(&kvm->slots_lock);
 981
 982         return res;
 983 }
 984
 985 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 986                                      struct kvm_device_attr *attr)
 987 {
 988         u64 mig = kvm->arch.migration_mode;
 989
 990         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 991                 return -ENXIO;
 992
 993         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 994                 return -EFAULT;
 995         return 0;
 996 }
 997
 998 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 999 {
1000         struct kvm_s390_vm_tod_clock gtod;
1001
1002         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1003                 return -EFAULT;
1004
1005         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1006                 return -EINVAL;
1007         kvm_s390_set_tod_clock(kvm, &gtod);
1008
1009         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1010                 gtod.epoch_idx, gtod.tod);
1011
1012         return 0;
1013 }
1014
1015 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017         u8 gtod_high;
1018
1019         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1020                                            sizeof(gtod_high)))
1021                 return -EFAULT;
1022
1023         if (gtod_high != 0)
1024                 return -EINVAL;
1025         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1026
1027         return 0;
1028 }
1029
1030 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1031 {
1032         struct kvm_s390_vm_tod_clock gtod = { 0 };
1033
1034         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1035                            sizeof(gtod.tod)))
1036                 return -EFAULT;
1037
1038         kvm_s390_set_tod_clock(kvm, &gtod);
1039         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1040         return 0;
1041 }
1042
1043 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1044 {
1045         int ret;
1046
1047         if (attr->flags)
1048                 return -EINVAL;
1049
1050         switch (attr->attr) {
1051         case KVM_S390_VM_TOD_EXT:
1052                 ret = kvm_s390_set_tod_ext(kvm, attr);
1053                 break;
1054         case KVM_S390_VM_TOD_HIGH:
1055                 ret = kvm_s390_set_tod_high(kvm, attr);
1056                 break;
1057         case KVM_S390_VM_TOD_LOW:
1058                 ret = kvm_s390_set_tod_low(kvm, attr);
1059                 break;
1060         default:
1061                 ret = -ENXIO;
1062                 break;
1063         }
1064         return ret;
1065 }
1066
1067 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1068                                    struct kvm_s390_vm_tod_clock *gtod)
1069 {
1070         struct kvm_s390_tod_clock_ext htod;
1071
1072         preempt_disable();
1073
1074         get_tod_clock_ext((char *)&htod);
1075
1076         gtod->tod = htod.tod + kvm->arch.epoch;
1077         gtod->epoch_idx = 0;
1078         if (test_kvm_facility(kvm, 139)) {
1079                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1080                 if (gtod->tod < htod.tod)
1081                         gtod->epoch_idx += 1;
1082         }
1083
1084         preempt_enable();
1085 }
1086
1087 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1088 {
1089         struct kvm_s390_vm_tod_clock gtod;
1090
1091         memset(&gtod, 0, sizeof(gtod));
1092         kvm_s390_get_tod_clock(kvm, &gtod);
1093         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1094                 return -EFAULT;
1095
1096         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1097                 gtod.epoch_idx, gtod.tod);
1098         return 0;
1099 }
1100
1101 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         u8 gtod_high = 0;
1104
1105         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1106                                          sizeof(gtod_high)))
1107                 return -EFAULT;
1108         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1109
1110         return 0;
1111 }
1112
1113 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1114 {
1115         u64 gtod;
1116
1117         gtod = kvm_s390_get_tod_clock_fast(kvm);
1118         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1119                 return -EFAULT;
1120         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1121
1122         return 0;
1123 }
1124
1125 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1126 {
1127         int ret;
1128
1129         if (attr->flags)
1130                 return -EINVAL;
1131
1132         switch (attr->attr) {
1133         case KVM_S390_VM_TOD_EXT:
1134                 ret = kvm_s390_get_tod_ext(kvm, attr);
1135                 break;
1136         case KVM_S390_VM_TOD_HIGH:
1137                 ret = kvm_s390_get_tod_high(kvm, attr);
1138                 break;
1139         case KVM_S390_VM_TOD_LOW:
1140                 ret = kvm_s390_get_tod_low(kvm, attr);
1141                 break;
1142         default:
1143                 ret = -ENXIO;
1144                 break;
1145         }
1146         return ret;
1147 }
1148
1149 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1150 {
1151         struct kvm_s390_vm_cpu_processor *proc;
1152         u16 lowest_ibc, unblocked_ibc;
1153         int ret = 0;
1154
1155         mutex_lock(&kvm->lock);
1156         if (kvm->created_vcpus) {
1157                 ret = -EBUSY;
1158                 goto out;
1159         }
1160         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1161         if (!proc) {
1162                 ret = -ENOMEM;
1163                 goto out;
1164         }
1165         if (!copy_from_user(proc, (void __user *)attr->addr,
1166                             sizeof(*proc))) {
1167                 kvm->arch.model.cpuid = proc->cpuid;
1168                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1169                 unblocked_ibc = sclp.ibc & 0xfff;
1170                 if (lowest_ibc && proc->ibc) {
1171                         if (proc->ibc > unblocked_ibc)
1172                                 kvm->arch.model.ibc = unblocked_ibc;
1173                         else if (proc->ibc < lowest_ibc)
1174                                 kvm->arch.model.ibc = lowest_ibc;
1175                         else
1176                                 kvm->arch.model.ibc = proc->ibc;
1177                 }
1178                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1179                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1180                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1181                          kvm->arch.model.ibc,
1182                          kvm->arch.model.cpuid);
1183                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1184                          kvm->arch.model.fac_list[0],
1185                          kvm->arch.model.fac_list[1],
1186                          kvm->arch.model.fac_list[2]);
1187         } else
1188                 ret = -EFAULT;
1189         kfree(proc);
1190 out:
1191         mutex_unlock(&kvm->lock);
1192         return ret;
1193 }
1194
1195 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1196                                        struct kvm_device_attr *attr)
1197 {
1198         struct kvm_s390_vm_cpu_feat data;
1199
1200         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1201                 return -EFAULT;
1202         if (!bitmap_subset((unsigned long *) data.feat,
1203                            kvm_s390_available_cpu_feat,
1204                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1205                 return -EINVAL;
1206
1207         mutex_lock(&kvm->lock);
1208         if (kvm->created_vcpus) {
1209                 mutex_unlock(&kvm->lock);
1210                 return -EBUSY;
1211         }
1212         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1213                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1214         mutex_unlock(&kvm->lock);
1215         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1216                          data.feat[0],
1217                          data.feat[1],
1218                          data.feat[2]);
1219         return 0;
1220 }
1221
1222 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1223                                           struct kvm_device_attr *attr)
1224 {
1225         /*
1226          * Once supported by kernel + hw, we have to store the subfunctions
1227          * in kvm->arch and remember that user space configured them.
1228          */
1229         return -ENXIO;
1230 }
1231
1232 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1233 {
1234         int ret = -ENXIO;
1235
1236         switch (attr->attr) {
1237         case KVM_S390_VM_CPU_PROCESSOR:
1238                 ret = kvm_s390_set_processor(kvm, attr);
1239                 break;
1240         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1241                 ret = kvm_s390_set_processor_feat(kvm, attr);
1242                 break;
1243         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1244                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1245                 break;
1246         }
1247         return ret;
1248 }
1249
1250 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1251 {
1252         struct kvm_s390_vm_cpu_processor *proc;
1253         int ret = 0;
1254
1255         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1256         if (!proc) {
1257                 ret = -ENOMEM;
1258                 goto out;
1259         }
1260         proc->cpuid = kvm->arch.model.cpuid;
1261         proc->ibc = kvm->arch.model.ibc;
1262         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1263                S390_ARCH_FAC_LIST_SIZE_BYTE);
1264         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1265                  kvm->arch.model.ibc,
1266                  kvm->arch.model.cpuid);
1267         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1268                  kvm->arch.model.fac_list[0],
1269                  kvm->arch.model.fac_list[1],
1270                  kvm->arch.model.fac_list[2]);
1271         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1272                 ret = -EFAULT;
1273         kfree(proc);
1274 out:
1275         return ret;
1276 }
1277
1278 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1279 {
1280         struct kvm_s390_vm_cpu_machine *mach;
1281         int ret = 0;
1282
1283         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1284         if (!mach) {
1285                 ret = -ENOMEM;
1286                 goto out;
1287         }
1288         get_cpu_id((struct cpuid *) &mach->cpuid);
1289         mach->ibc = sclp.ibc;
1290         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1291                S390_ARCH_FAC_LIST_SIZE_BYTE);
1292         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1293                sizeof(S390_lowcore.stfle_fac_list));
1294         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1295                  kvm->arch.model.ibc,
1296                  kvm->arch.model.cpuid);
1297         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1298                  mach->fac_mask[0],
1299                  mach->fac_mask[1],
1300                  mach->fac_mask[2]);
1301         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1302                  mach->fac_list[0],
1303                  mach->fac_list[1],
1304                  mach->fac_list[2]);
1305         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1306                 ret = -EFAULT;
1307         kfree(mach);
1308 out:
1309         return ret;
1310 }
1311
1312 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1313                                        struct kvm_device_attr *attr)
1314 {
1315         struct kvm_s390_vm_cpu_feat data;
1316
1317         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1318                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1319         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1320                 return -EFAULT;
1321         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1322                          data.feat[0],
1323                          data.feat[1],
1324                          data.feat[2]);
1325         return 0;
1326 }
1327
1328 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1329                                      struct kvm_device_attr *attr)
1330 {
1331         struct kvm_s390_vm_cpu_feat data;
1332
1333         bitmap_copy((unsigned long *) data.feat,
1334                     kvm_s390_available_cpu_feat,
1335                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1336         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1337                 return -EFAULT;
1338         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339                          data.feat[0],
1340                          data.feat[1],
1341                          data.feat[2]);
1342         return 0;
1343 }
1344
1345 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1346                                           struct kvm_device_attr *attr)
1347 {
1348         /*
1349          * Once we can actually configure subfunctions (kernel + hw support),
1350          * we have to check if they were already set by user space, if so copy
1351          * them from kvm->arch.
1352          */
1353         return -ENXIO;
1354 }
1355
1356 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1357                                         struct kvm_device_attr *attr)
1358 {
1359         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1360             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1361                 return -EFAULT;
1362         return 0;
1363 }
1364 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1365 {
1366         int ret = -ENXIO;
1367
1368         switch (attr->attr) {
1369         case KVM_S390_VM_CPU_PROCESSOR:
1370                 ret = kvm_s390_get_processor(kvm, attr);
1371                 break;
1372         case KVM_S390_VM_CPU_MACHINE:
1373                 ret = kvm_s390_get_machine(kvm, attr);
1374                 break;
1375         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1376                 ret = kvm_s390_get_processor_feat(kvm, attr);
1377                 break;
1378         case KVM_S390_VM_CPU_MACHINE_FEAT:
1379                 ret = kvm_s390_get_machine_feat(kvm, attr);
1380                 break;
1381         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1382                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1383                 break;
1384         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1385                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1386                 break;
1387         }
1388         return ret;
1389 }
1390
1391 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1392 {
1393         int ret;
1394
1395         switch (attr->group) {
1396         case KVM_S390_VM_MEM_CTRL:
1397                 ret = kvm_s390_set_mem_control(kvm, attr);
1398                 break;
1399         case KVM_S390_VM_TOD:
1400                 ret = kvm_s390_set_tod(kvm, attr);
1401                 break;
1402         case KVM_S390_VM_CPU_MODEL:
1403                 ret = kvm_s390_set_cpu_model(kvm, attr);
1404                 break;
1405         case KVM_S390_VM_CRYPTO:
1406                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1407                 break;
1408         case KVM_S390_VM_MIGRATION:
1409                 ret = kvm_s390_vm_set_migration(kvm, attr);
1410                 break;
1411         default:
1412                 ret = -ENXIO;
1413                 break;
1414         }
1415
1416         return ret;
1417 }
1418
1419 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1420 {
1421         int ret;
1422
1423         switch (attr->group) {
1424         case KVM_S390_VM_MEM_CTRL:
1425                 ret = kvm_s390_get_mem_control(kvm, attr);
1426                 break;
1427         case KVM_S390_VM_TOD:
1428                 ret = kvm_s390_get_tod(kvm, attr);
1429                 break;
1430         case KVM_S390_VM_CPU_MODEL:
1431                 ret = kvm_s390_get_cpu_model(kvm, attr);
1432                 break;
1433         case KVM_S390_VM_MIGRATION:
1434                 ret = kvm_s390_vm_get_migration(kvm, attr);
1435                 break;
1436         default:
1437                 ret = -ENXIO;
1438                 break;
1439         }
1440
1441         return ret;
1442 }
1443
1444 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1445 {
1446         int ret;
1447
1448         switch (attr->group) {
1449         case KVM_S390_VM_MEM_CTRL:
1450                 switch (attr->attr) {
1451                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1452                 case KVM_S390_VM_MEM_CLR_CMMA:
1453                         ret = sclp.has_cmma ? 0 : -ENXIO;
1454                         break;
1455                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1456                         ret = 0;
1457                         break;
1458                 default:
1459                         ret = -ENXIO;
1460                         break;
1461                 }
1462                 break;
1463         case KVM_S390_VM_TOD:
1464                 switch (attr->attr) {
1465                 case KVM_S390_VM_TOD_LOW:
1466                 case KVM_S390_VM_TOD_HIGH:
1467                         ret = 0;
1468                         break;
1469                 default:
1470                         ret = -ENXIO;
1471                         break;
1472                 }
1473                 break;
1474         case KVM_S390_VM_CPU_MODEL:
1475                 switch (attr->attr) {
1476                 case KVM_S390_VM_CPU_PROCESSOR:
1477                 case KVM_S390_VM_CPU_MACHINE:
1478                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1479                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1480                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1481                         ret = 0;
1482                         break;
1483                 /* configuring subfunctions is not supported yet */
1484                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1485                 default:
1486                         ret = -ENXIO;
1487                         break;
1488                 }
1489                 break;
1490         case KVM_S390_VM_CRYPTO:
1491                 switch (attr->attr) {
1492                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1493                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1494                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1495                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1496                         ret = 0;
1497                         break;
1498                 default:
1499                         ret = -ENXIO;
1500                         break;
1501                 }
1502                 break;
1503         case KVM_S390_VM_MIGRATION:
1504                 ret = 0;
1505                 break;
1506         default:
1507                 ret = -ENXIO;
1508                 break;
1509         }
1510
1511         return ret;
1512 }
1513
1514 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1515 {
1516         uint8_t *keys;
1517         uint64_t hva;
1518         int srcu_idx, i, r = 0;
1519
1520         if (args->flags != 0)
1521                 return -EINVAL;
1522
1523         /* Is this guest using storage keys? */
1524         if (!mm_uses_skeys(current->mm))
1525                 return KVM_S390_GET_SKEYS_NONE;
1526
1527         /* Enforce sane limit on memory allocation */
1528         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1529                 return -EINVAL;
1530
1531         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1532         if (!keys)
1533                 return -ENOMEM;
1534
1535         down_read(&current->mm->mmap_sem);
1536         srcu_idx = srcu_read_lock(&kvm->srcu);
1537         for (i = 0; i < args->count; i++) {
1538                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1539                 if (kvm_is_error_hva(hva)) {
1540                         r = -EFAULT;
1541                         break;
1542                 }
1543
1544                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1545                 if (r)
1546                         break;
1547         }
1548         srcu_read_unlock(&kvm->srcu, srcu_idx);
1549         up_read(&current->mm->mmap_sem);
1550
1551         if (!r) {
1552                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1553                                  sizeof(uint8_t) * args->count);
1554                 if (r)
1555                         r = -EFAULT;
1556         }
1557
1558         kvfree(keys);
1559         return r;
1560 }
1561
1562 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1563 {
1564         uint8_t *keys;
1565         uint64_t hva;
1566         int srcu_idx, i, r = 0;
1567         bool unlocked;
1568
1569         if (args->flags != 0)
1570                 return -EINVAL;
1571
1572         /* Enforce sane limit on memory allocation */
1573         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1574                 return -EINVAL;
1575
1576         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1577         if (!keys)
1578                 return -ENOMEM;
1579
1580         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1581                            sizeof(uint8_t) * args->count);
1582         if (r) {
1583                 r = -EFAULT;
1584                 goto out;
1585         }
1586
1587         /* Enable storage key handling for the guest */
1588         r = s390_enable_skey();
1589         if (r)
1590                 goto out;
1591
1592         i = 0;
1593         down_read(&current->mm->mmap_sem);
1594         srcu_idx = srcu_read_lock(&kvm->srcu);
1595         while (i < args->count) {
1596                 unlocked = false;
1597                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1598                 if (kvm_is_error_hva(hva)) {
1599                         r = -EFAULT;
1600                         break;
1601                 }
1602
1603                 /* Lowest order bit is reserved */
1604                 if (keys[i] & 0x01) {
1605                         r = -EINVAL;
1606                         break;
1607                 }
1608
1609                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1610                 if (r) {
1611                         r = fixup_user_fault(current, current->mm, hva,
1612                                              FAULT_FLAG_WRITE, &unlocked);
1613                         if (r)
1614                                 break;
1615                 }
1616                 if (!r)
1617                         i++;
1618         }
1619         srcu_read_unlock(&kvm->srcu, srcu_idx);
1620         up_read(&current->mm->mmap_sem);
1621 out:
1622         kvfree(keys);
1623         return r;
1624 }
1625
1626 /*
1627  * Base address and length must be sent at the start of each block, therefore
1628  * it's cheaper to send some clean data, as long as it's less than the size of
1629  * two longs.
1630  */
1631 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1632 /* for consistency */
1633 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1634
1635 /*
1636  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1637  * address falls in a hole. In that case the index of one of the memslots
1638  * bordering the hole is returned.
1639  */
1640 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1641 {
1642         int start = 0, end = slots->used_slots;
1643         int slot = atomic_read(&slots->lru_slot);
1644         struct kvm_memory_slot *memslots = slots->memslots;
1645
1646         if (gfn >= memslots[slot].base_gfn &&
1647             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1648                 return slot;
1649
1650         while (start < end) {
1651                 slot = start + (end - start) / 2;
1652
1653                 if (gfn >= memslots[slot].base_gfn)
1654                         end = slot;
1655                 else
1656                         start = slot + 1;
1657         }
1658
1659         if (gfn >= memslots[start].base_gfn &&
1660             gfn < memslots[start].base_gfn + memslots[start].npages) {
1661                 atomic_set(&slots->lru_slot, start);
1662         }
1663
1664         return start;
1665 }
1666
1667 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1668                               u8 *res, unsigned long bufsize)
1669 {
1670         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1671
1672         args->count = 0;
1673         while (args->count < bufsize) {
1674                 hva = gfn_to_hva(kvm, cur_gfn);
1675                 /*
1676                  * We return an error if the first value was invalid, but we
1677                  * return successfully if at least one value was copied.
1678                  */
1679                 if (kvm_is_error_hva(hva))
1680                         return args->count ? 0 : -EFAULT;
1681                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1682                         pgstev = 0;
1683                 res[args->count++] = (pgstev >> 24) & 0x43;
1684                 cur_gfn++;
1685         }
1686
1687         return 0;
1688 }
1689
1690 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1691                                               unsigned long cur_gfn)
1692 {
1693         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1694         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1695         unsigned long ofs = cur_gfn - ms->base_gfn;
1696
1697         if (ms->base_gfn + ms->npages <= cur_gfn) {
1698                 slotidx--;
1699                 /* If we are above the highest slot, wrap around */
1700                 if (slotidx < 0)
1701                         slotidx = slots->used_slots - 1;
1702
1703                 ms = slots->memslots + slotidx;
1704                 ofs = 0;
1705         }
1706         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1707         while ((slotidx > 0) && (ofs >= ms->npages)) {
1708                 slotidx--;
1709                 ms = slots->memslots + slotidx;
1710                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1711         }
1712         return ms->base_gfn + ofs;
1713 }
1714
1715 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1716                              u8 *res, unsigned long bufsize)
1717 {
1718         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1719         struct kvm_memslots *slots = kvm_memslots(kvm);
1720         struct kvm_memory_slot *ms;
1721
1722         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1723         ms = gfn_to_memslot(kvm, cur_gfn);
1724         args->count = 0;
1725         args->start_gfn = cur_gfn;
1726         if (!ms)
1727                 return 0;
1728         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1729         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1730
1731         while (args->count < bufsize) {
1732                 hva = gfn_to_hva(kvm, cur_gfn);
1733                 if (kvm_is_error_hva(hva))
1734                         return 0;
1735                 /* Decrement only if we actually flipped the bit to 0 */
1736                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1737                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1738                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1739                         pgstev = 0;
1740                 /* Save the value */
1741                 res[args->count++] = (pgstev >> 24) & 0x43;
1742                 /* If the next bit is too far away, stop. */
1743                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1744                         return 0;
1745                 /* If we reached the previous "next", find the next one */
1746                 if (cur_gfn == next_gfn)
1747                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1748                 /* Reached the end of memory or of the buffer, stop */
1749                 if ((next_gfn >= mem_end) ||
1750                     (next_gfn - args->start_gfn >= bufsize))
1751                         return 0;
1752                 cur_gfn++;
1753                 /* Reached the end of the current memslot, take the next one. */
1754                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1755                         ms = gfn_to_memslot(kvm, cur_gfn);
1756                         if (!ms)
1757                                 return 0;
1758                 }
1759         }
1760         return 0;
1761 }
1762
1763 /*
1764  * This function searches for the next page with dirty CMMA attributes, and
1765  * saves the attributes in the buffer up to either the end of the buffer or
1766  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1767  * no trailing clean bytes are saved.
1768  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1769  * output buffer will indicate 0 as length.
1770  */
1771 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1772                                   struct kvm_s390_cmma_log *args)
1773 {
1774         unsigned long bufsize;
1775         int srcu_idx, peek, ret;
1776         u8 *values;
1777
1778         if (!kvm->arch.use_cmma)
1779                 return -ENXIO;
1780         /* Invalid/unsupported flags were specified */
1781         if (args->flags & ~KVM_S390_CMMA_PEEK)
1782                 return -EINVAL;
1783         /* Migration mode query, and we are not doing a migration */
1784         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1785         if (!peek && !kvm->arch.migration_mode)
1786                 return -EINVAL;
1787         /* CMMA is disabled or was not used, or the buffer has length zero */
1788         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1789         if (!bufsize || !kvm->mm->context.uses_cmm) {
1790                 memset(args, 0, sizeof(*args));
1791                 return 0;
1792         }
1793         /* We are not peeking, and there are no dirty pages */
1794         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1795                 memset(args, 0, sizeof(*args));
1796                 return 0;
1797         }
1798
1799         values = vmalloc(bufsize);
1800         if (!values)
1801                 return -ENOMEM;
1802
1803         down_read(&kvm->mm->mmap_sem);
1804         srcu_idx = srcu_read_lock(&kvm->srcu);
1805         if (peek)
1806                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1807         else
1808                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1809         srcu_read_unlock(&kvm->srcu, srcu_idx);
1810         up_read(&kvm->mm->mmap_sem);
1811
1812         if (kvm->arch.migration_mode)
1813                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
1814         else
1815                 args->remaining = 0;
1816
1817         if (copy_to_user((void __user *)args->values, values, args->count))
1818                 ret = -EFAULT;
1819
1820         vfree(values);
1821         return ret;
1822 }
1823
1824 /*
1825  * This function sets the CMMA attributes for the given pages. If the input
1826  * buffer has zero length, no action is taken, otherwise the attributes are
1827  * set and the mm->context.uses_cmm flag is set.
1828  */
1829 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1830                                   const struct kvm_s390_cmma_log *args)
1831 {
1832         unsigned long hva, mask, pgstev, i;
1833         uint8_t *bits;
1834         int srcu_idx, r = 0;
1835
1836         mask = args->mask;
1837
1838         if (!kvm->arch.use_cmma)
1839                 return -ENXIO;
1840         /* invalid/unsupported flags */
1841         if (args->flags != 0)
1842                 return -EINVAL;
1843         /* Enforce sane limit on memory allocation */
1844         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1845                 return -EINVAL;
1846         /* Nothing to do */
1847         if (args->count == 0)
1848                 return 0;
1849
1850         bits = vmalloc(array_size(sizeof(*bits), args->count));
1851         if (!bits)
1852                 return -ENOMEM;
1853
1854         r = copy_from_user(bits, (void __user *)args->values, args->count);
1855         if (r) {
1856                 r = -EFAULT;
1857                 goto out;
1858         }
1859
1860         down_read(&kvm->mm->mmap_sem);
1861         srcu_idx = srcu_read_lock(&kvm->srcu);
1862         for (i = 0; i < args->count; i++) {
1863                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1864                 if (kvm_is_error_hva(hva)) {
1865                         r = -EFAULT;
1866                         break;
1867                 }
1868
1869                 pgstev = bits[i];
1870                 pgstev = pgstev << 24;
1871                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1872                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1873         }
1874         srcu_read_unlock(&kvm->srcu, srcu_idx);
1875         up_read(&kvm->mm->mmap_sem);
1876
1877         if (!kvm->mm->context.uses_cmm) {
1878                 down_write(&kvm->mm->mmap_sem);
1879                 kvm->mm->context.uses_cmm = 1;
1880                 up_write(&kvm->mm->mmap_sem);
1881         }
1882 out:
1883         vfree(bits);
1884         return r;
1885 }
1886
1887 long kvm_arch_vm_ioctl(struct file *filp,
1888                        unsigned int ioctl, unsigned long arg)
1889 {
1890         struct kvm *kvm = filp->private_data;
1891         void __user *argp = (void __user *)arg;
1892         struct kvm_device_attr attr;
1893         int r;
1894
1895         switch (ioctl) {
1896         case KVM_S390_INTERRUPT: {
1897                 struct kvm_s390_interrupt s390int;
1898
1899                 r = -EFAULT;
1900                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1901                         break;
1902                 r = kvm_s390_inject_vm(kvm, &s390int);
1903                 break;
1904         }
1905         case KVM_ENABLE_CAP: {
1906                 struct kvm_enable_cap cap;
1907                 r = -EFAULT;
1908                 if (copy_from_user(&cap, argp, sizeof(cap)))
1909                         break;
1910                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1911                 break;
1912         }
1913         case KVM_CREATE_IRQCHIP: {
1914                 struct kvm_irq_routing_entry routing;
1915
1916                 r = -EINVAL;
1917                 if (kvm->arch.use_irqchip) {
1918                         /* Set up dummy routing. */
1919                         memset(&routing, 0, sizeof(routing));
1920                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1921                 }
1922                 break;
1923         }
1924         case KVM_SET_DEVICE_ATTR: {
1925                 r = -EFAULT;
1926                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1927                         break;
1928                 r = kvm_s390_vm_set_attr(kvm, &attr);
1929                 break;
1930         }
1931         case KVM_GET_DEVICE_ATTR: {
1932                 r = -EFAULT;
1933                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1934                         break;
1935                 r = kvm_s390_vm_get_attr(kvm, &attr);
1936                 break;
1937         }
1938         case KVM_HAS_DEVICE_ATTR: {
1939                 r = -EFAULT;
1940                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1941                         break;
1942                 r = kvm_s390_vm_has_attr(kvm, &attr);
1943                 break;
1944         }
1945         case KVM_S390_GET_SKEYS: {
1946                 struct kvm_s390_skeys args;
1947
1948                 r = -EFAULT;
1949                 if (copy_from_user(&args, argp,
1950                                    sizeof(struct kvm_s390_skeys)))
1951                         break;
1952                 r = kvm_s390_get_skeys(kvm, &args);
1953                 break;
1954         }
1955         case KVM_S390_SET_SKEYS: {
1956                 struct kvm_s390_skeys args;
1957
1958                 r = -EFAULT;
1959                 if (copy_from_user(&args, argp,
1960                                    sizeof(struct kvm_s390_skeys)))
1961                         break;
1962                 r = kvm_s390_set_skeys(kvm, &args);
1963                 break;
1964         }
1965         case KVM_S390_GET_CMMA_BITS: {
1966                 struct kvm_s390_cmma_log args;
1967
1968                 r = -EFAULT;
1969                 if (copy_from_user(&args, argp, sizeof(args)))
1970                         break;
1971                 mutex_lock(&kvm->slots_lock);
1972                 r = kvm_s390_get_cmma_bits(kvm, &args);
1973                 mutex_unlock(&kvm->slots_lock);
1974                 if (!r) {
1975                         r = copy_to_user(argp, &args, sizeof(args));
1976                         if (r)
1977                                 r = -EFAULT;
1978                 }
1979                 break;
1980         }
1981         case KVM_S390_SET_CMMA_BITS: {
1982                 struct kvm_s390_cmma_log args;
1983
1984                 r = -EFAULT;
1985                 if (copy_from_user(&args, argp, sizeof(args)))
1986                         break;
1987                 mutex_lock(&kvm->slots_lock);
1988                 r = kvm_s390_set_cmma_bits(kvm, &args);
1989                 mutex_unlock(&kvm->slots_lock);
1990                 break;
1991         }
1992         default:
1993                 r = -ENOTTY;
1994         }
1995
1996         return r;
1997 }
1998
1999 static int kvm_s390_apxa_installed(void)
2000 {
2001         struct ap_config_info info;
2002
2003         if (ap_instructions_available()) {
2004                 if (ap_qci(&info) == 0)
2005                         return info.apxa;
2006         }
2007
2008         return 0;
2009 }
2010
2011 /*
2012  * The format of the crypto control block (CRYCB) is specified in the 3 low
2013  * order bits of the CRYCB designation (CRYCBD) field as follows:
2014  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2015  *           AP extended addressing (APXA) facility are installed.
2016  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2017  * Format 2: Both the APXA and MSAX3 facilities are installed
2018  */
2019 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2020 {
2021         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2022
2023         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2024         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2025
2026         /* Check whether MSAX3 is installed */
2027         if (!test_kvm_facility(kvm, 76))
2028                 return;
2029
2030         if (kvm_s390_apxa_installed())
2031                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2032         else
2033                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2034 }
2035
2036 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2037 {
2038         mutex_lock(&kvm->lock);
2039         kvm_s390_vcpu_block_all(kvm);
2040
2041         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2042                sizeof(kvm->arch.crypto.crycb->apcb0));
2043         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2044                sizeof(kvm->arch.crypto.crycb->apcb1));
2045
2046         /* recreate the shadow crycb for each vcpu */
2047         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2048         kvm_s390_vcpu_unblock_all(kvm);
2049         mutex_unlock(&kvm->lock);
2050 }
2051 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2052
2053 static u64 kvm_s390_get_initial_cpuid(void)
2054 {
2055         struct cpuid cpuid;
2056
2057         get_cpu_id(&cpuid);
2058         cpuid.version = 0xff;
2059         return *((u64 *) &cpuid);
2060 }
2061
2062 static void kvm_s390_crypto_init(struct kvm *kvm)
2063 {
2064         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2065         kvm_s390_set_crycb_format(kvm);
2066
2067         if (!test_kvm_facility(kvm, 76))
2068                 return;
2069
2070         /* Enable AES/DEA protected key functions by default */
2071         kvm->arch.crypto.aes_kw = 1;
2072         kvm->arch.crypto.dea_kw = 1;
2073         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2074                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2075         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2076                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2077 }
2078
2079 static void sca_dispose(struct kvm *kvm)
2080 {
2081         if (kvm->arch.use_esca)
2082                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2083         else
2084                 free_page((unsigned long)(kvm->arch.sca));
2085         kvm->arch.sca = NULL;
2086 }
2087
2088 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2089 {
2090         gfp_t alloc_flags = GFP_KERNEL;
2091         int i, rc;
2092         char debug_name[16];
2093         static unsigned long sca_offset;
2094
2095         rc = -EINVAL;
2096 #ifdef CONFIG_KVM_S390_UCONTROL
2097         if (type & ~KVM_VM_S390_UCONTROL)
2098                 goto out_err;
2099         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2100                 goto out_err;
2101 #else
2102         if (type)
2103                 goto out_err;
2104 #endif
2105
2106         rc = s390_enable_sie();
2107         if (rc)
2108                 goto out_err;
2109
2110         rc = -ENOMEM;
2111
2112         if (!sclp.has_64bscao)
2113                 alloc_flags |= GFP_DMA;
2114         rwlock_init(&kvm->arch.sca_lock);
2115         /* start with basic SCA */
2116         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2117         if (!kvm->arch.sca)
2118                 goto out_err;
2119         spin_lock(&kvm_lock);
2120         sca_offset += 16;
2121         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2122                 sca_offset = 0;
2123         kvm->arch.sca = (struct bsca_block *)
2124                         ((char *) kvm->arch.sca + sca_offset);
2125         spin_unlock(&kvm_lock);
2126
2127         sprintf(debug_name, "kvm-%u", current->pid);
2128
2129         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2130         if (!kvm->arch.dbf)
2131                 goto out_err;
2132
2133         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2134         kvm->arch.sie_page2 =
2135              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2136         if (!kvm->arch.sie_page2)
2137                 goto out_err;
2138
2139         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2140
2141         for (i = 0; i < kvm_s390_fac_size(); i++) {
2142                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2143                                               (kvm_s390_fac_base[i] |
2144                                                kvm_s390_fac_ext[i]);
2145                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2146                                               kvm_s390_fac_base[i];
2147         }
2148
2149         /* we are always in czam mode - even on pre z14 machines */
2150         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2151         set_kvm_facility(kvm->arch.model.fac_list, 138);
2152         /* we emulate STHYI in kvm */
2153         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2154         set_kvm_facility(kvm->arch.model.fac_list, 74);
2155         if (MACHINE_HAS_TLB_GUEST) {
2156                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2157                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2158         }
2159
2160         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2161         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2162
2163         kvm_s390_crypto_init(kvm);
2164
2165         mutex_init(&kvm->arch.float_int.ais_lock);
2166         spin_lock_init(&kvm->arch.float_int.lock);
2167         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2168                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2169         init_waitqueue_head(&kvm->arch.ipte_wq);
2170         mutex_init(&kvm->arch.ipte_mutex);
2171
2172         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2173         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2174
2175         if (type & KVM_VM_S390_UCONTROL) {
2176                 kvm->arch.gmap = NULL;
2177                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2178         } else {
2179                 if (sclp.hamax == U64_MAX)
2180                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2181                 else
2182                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2183                                                     sclp.hamax + 1);
2184                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2185                 if (!kvm->arch.gmap)
2186                         goto out_err;
2187                 kvm->arch.gmap->private = kvm;
2188                 kvm->arch.gmap->pfault_enabled = 0;
2189         }
2190
2191         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2192         kvm->arch.use_skf = sclp.has_skey;
2193         spin_lock_init(&kvm->arch.start_stop_lock);
2194         kvm_s390_vsie_init(kvm);
2195         kvm_s390_gisa_init(kvm);
2196         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2197
2198         return 0;
2199 out_err:
2200         free_page((unsigned long)kvm->arch.sie_page2);
2201         debug_unregister(kvm->arch.dbf);
2202         sca_dispose(kvm);
2203         KVM_EVENT(3, "creation of vm failed: %d", rc);
2204         return rc;
2205 }
2206
2207 bool kvm_arch_has_vcpu_debugfs(void)
2208 {
2209         return false;
2210 }
2211
2212 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2213 {
2214         return 0;
2215 }
2216
2217 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2218 {
2219         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2220         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2221         kvm_s390_clear_local_irqs(vcpu);
2222         kvm_clear_async_pf_completion_queue(vcpu);
2223         if (!kvm_is_ucontrol(vcpu->kvm))
2224                 sca_del_vcpu(vcpu);
2225
2226         if (kvm_is_ucontrol(vcpu->kvm))
2227                 gmap_remove(vcpu->arch.gmap);
2228
2229         if (vcpu->kvm->arch.use_cmma)
2230                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2231         free_page((unsigned long)(vcpu->arch.sie_block));
2232
2233         kvm_vcpu_uninit(vcpu);
2234         kmem_cache_free(kvm_vcpu_cache, vcpu);
2235 }
2236
2237 static void kvm_free_vcpus(struct kvm *kvm)
2238 {
2239         unsigned int i;
2240         struct kvm_vcpu *vcpu;
2241
2242         kvm_for_each_vcpu(i, vcpu, kvm)
2243                 kvm_arch_vcpu_destroy(vcpu);
2244
2245         mutex_lock(&kvm->lock);
2246         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2247                 kvm->vcpus[i] = NULL;
2248
2249         atomic_set(&kvm->online_vcpus, 0);
2250         mutex_unlock(&kvm->lock);
2251 }
2252
2253 void kvm_arch_destroy_vm(struct kvm *kvm)
2254 {
2255         kvm_free_vcpus(kvm);
2256         sca_dispose(kvm);
2257         debug_unregister(kvm->arch.dbf);
2258         kvm_s390_gisa_destroy(kvm);
2259         free_page((unsigned long)kvm->arch.sie_page2);
2260         if (!kvm_is_ucontrol(kvm))
2261                 gmap_remove(kvm->arch.gmap);
2262         kvm_s390_destroy_adapters(kvm);
2263         kvm_s390_clear_float_irqs(kvm);
2264         kvm_s390_vsie_destroy(kvm);
2265         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2266 }
2267
2268 /* Section: vcpu related */
2269 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2270 {
2271         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2272         if (!vcpu->arch.gmap)
2273                 return -ENOMEM;
2274         vcpu->arch.gmap->private = vcpu->kvm;
2275
2276         return 0;
2277 }
2278
2279 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2280 {
2281         if (!kvm_s390_use_sca_entries())
2282                 return;
2283         read_lock(&vcpu->kvm->arch.sca_lock);
2284         if (vcpu->kvm->arch.use_esca) {
2285                 struct esca_block *sca = vcpu->kvm->arch.sca;
2286
2287                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2288                 sca->cpu[vcpu->vcpu_id].sda = 0;
2289         } else {
2290                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2291
2292                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2293                 sca->cpu[vcpu->vcpu_id].sda = 0;
2294         }
2295         read_unlock(&vcpu->kvm->arch.sca_lock);
2296 }
2297
2298 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2299 {
2300         if (!kvm_s390_use_sca_entries()) {
2301                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2302
2303                 /* we still need the basic sca for the ipte control */
2304                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2305                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2306                 return;
2307         }
2308         read_lock(&vcpu->kvm->arch.sca_lock);
2309         if (vcpu->kvm->arch.use_esca) {
2310                 struct esca_block *sca = vcpu->kvm->arch.sca;
2311
2312                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2313                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2314                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2315                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2316                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2317         } else {
2318                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2319
2320                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2321                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2322                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2323                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2324         }
2325         read_unlock(&vcpu->kvm->arch.sca_lock);
2326 }
2327
2328 /* Basic SCA to Extended SCA data copy routines */
2329 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2330 {
2331         d->sda = s->sda;
2332         d->sigp_ctrl.c = s->sigp_ctrl.c;
2333         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2334 }
2335
2336 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2337 {
2338         int i;
2339
2340         d->ipte_control = s->ipte_control;
2341         d->mcn[0] = s->mcn;
2342         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2343                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2344 }
2345
2346 static int sca_switch_to_extended(struct kvm *kvm)
2347 {
2348         struct bsca_block *old_sca = kvm->arch.sca;
2349         struct esca_block *new_sca;
2350         struct kvm_vcpu *vcpu;
2351         unsigned int vcpu_idx;
2352         u32 scaol, scaoh;
2353
2354         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2355         if (!new_sca)
2356                 return -ENOMEM;
2357
2358         scaoh = (u32)((u64)(new_sca) >> 32);
2359         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2360
2361         kvm_s390_vcpu_block_all(kvm);
2362         write_lock(&kvm->arch.sca_lock);
2363
2364         sca_copy_b_to_e(new_sca, old_sca);
2365
2366         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2367                 vcpu->arch.sie_block->scaoh = scaoh;
2368                 vcpu->arch.sie_block->scaol = scaol;
2369                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2370         }
2371         kvm->arch.sca = new_sca;
2372         kvm->arch.use_esca = 1;
2373
2374         write_unlock(&kvm->arch.sca_lock);
2375         kvm_s390_vcpu_unblock_all(kvm);
2376
2377         free_page((unsigned long)old_sca);
2378
2379         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2380                  old_sca, kvm->arch.sca);
2381         return 0;
2382 }
2383
2384 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2385 {
2386         int rc;
2387
2388         if (!kvm_s390_use_sca_entries()) {
2389                 if (id < KVM_MAX_VCPUS)
2390                         return true;
2391                 return false;
2392         }
2393         if (id < KVM_S390_BSCA_CPU_SLOTS)
2394                 return true;
2395         if (!sclp.has_esca || !sclp.has_64bscao)
2396                 return false;
2397
2398         mutex_lock(&kvm->lock);
2399         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2400         mutex_unlock(&kvm->lock);
2401
2402         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2403 }
2404
2405 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2406 {
2407         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2408         kvm_clear_async_pf_completion_queue(vcpu);
2409         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2410                                     KVM_SYNC_GPRS |
2411                                     KVM_SYNC_ACRS |
2412                                     KVM_SYNC_CRS |
2413                                     KVM_SYNC_ARCH0 |
2414                                     KVM_SYNC_PFAULT;
2415         kvm_s390_set_prefix(vcpu, 0);
2416         if (test_kvm_facility(vcpu->kvm, 64))
2417                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2418         if (test_kvm_facility(vcpu->kvm, 82))
2419                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2420         if (test_kvm_facility(vcpu->kvm, 133))
2421                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2422         if (test_kvm_facility(vcpu->kvm, 156))
2423                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2424         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2425          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2426          */
2427         if (MACHINE_HAS_VX)
2428                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2429         else
2430                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2431
2432         if (kvm_is_ucontrol(vcpu->kvm))
2433                 return __kvm_ucontrol_vcpu_init(vcpu);
2434
2435         return 0;
2436 }
2437
2438 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2439 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2440 {
2441         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2442         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2443         vcpu->arch.cputm_start = get_tod_clock_fast();
2444         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2445 }
2446
2447 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2448 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2449 {
2450         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2451         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2452         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2453         vcpu->arch.cputm_start = 0;
2454         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2455 }
2456
2457 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2458 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2459 {
2460         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2461         vcpu->arch.cputm_enabled = true;
2462         __start_cpu_timer_accounting(vcpu);
2463 }
2464
2465 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2466 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2467 {
2468         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2469         __stop_cpu_timer_accounting(vcpu);
2470         vcpu->arch.cputm_enabled = false;
2471 }
2472
2473 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2474 {
2475         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2476         __enable_cpu_timer_accounting(vcpu);
2477         preempt_enable();
2478 }
2479
2480 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2481 {
2482         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2483         __disable_cpu_timer_accounting(vcpu);
2484         preempt_enable();
2485 }
2486
2487 /* set the cpu timer - may only be called from the VCPU thread itself */
2488 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2489 {
2490         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2491         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2492         if (vcpu->arch.cputm_enabled)
2493                 vcpu->arch.cputm_start = get_tod_clock_fast();
2494         vcpu->arch.sie_block->cputm = cputm;
2495         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2496         preempt_enable();
2497 }
2498
2499 /* update and get the cpu timer - can also be called from other VCPU threads */
2500 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2501 {
2502         unsigned int seq;
2503         __u64 value;
2504
2505         if (unlikely(!vcpu->arch.cputm_enabled))
2506                 return vcpu->arch.sie_block->cputm;
2507
2508         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2509         do {
2510                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2511                 /*
2512                  * If the writer would ever execute a read in the critical
2513                  * section, e.g. in irq context, we have a deadlock.
2514                  */
2515                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2516                 value = vcpu->arch.sie_block->cputm;
2517                 /* if cputm_start is 0, accounting is being started/stopped */
2518                 if (likely(vcpu->arch.cputm_start))
2519                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2520         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2521         preempt_enable();
2522         return value;
2523 }
2524
2525 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2526 {
2527
2528         gmap_enable(vcpu->arch.enabled_gmap);
2529         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2530         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2531                 __start_cpu_timer_accounting(vcpu);
2532         vcpu->cpu = cpu;
2533 }
2534
2535 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2536 {
2537         vcpu->cpu = -1;
2538         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2539                 __stop_cpu_timer_accounting(vcpu);
2540         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2541         vcpu->arch.enabled_gmap = gmap_get_enabled();
2542         gmap_disable(vcpu->arch.enabled_gmap);
2543
2544 }
2545
2546 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2547 {
2548         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2549         vcpu->arch.sie_block->gpsw.mask = 0UL;
2550         vcpu->arch.sie_block->gpsw.addr = 0UL;
2551         kvm_s390_set_prefix(vcpu, 0);
2552         kvm_s390_set_cpu_timer(vcpu, 0);
2553         vcpu->arch.sie_block->ckc       = 0UL;
2554         vcpu->arch.sie_block->todpr     = 0;
2555         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2556         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2557                                         CR0_INTERRUPT_KEY_SUBMASK |
2558                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2559         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2560                                         CR14_UNUSED_33 |
2561                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2562         /* make sure the new fpc will be lazily loaded */
2563         save_fpu_regs();
2564         current->thread.fpu.fpc = 0;
2565         vcpu->arch.sie_block->gbea = 1;
2566         vcpu->arch.sie_block->pp = 0;
2567         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2568         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2569         kvm_clear_async_pf_completion_queue(vcpu);
2570         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2571                 kvm_s390_vcpu_stop(vcpu);
2572         kvm_s390_clear_local_irqs(vcpu);
2573 }
2574
2575 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2576 {
2577         mutex_lock(&vcpu->kvm->lock);
2578         preempt_disable();
2579         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2580         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2581         preempt_enable();
2582         mutex_unlock(&vcpu->kvm->lock);
2583         if (!kvm_is_ucontrol(vcpu->kvm)) {
2584                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2585                 sca_add_vcpu(vcpu);
2586         }
2587         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2588                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2589         /* make vcpu_load load the right gmap on the first trigger */
2590         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2591 }
2592
2593 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2594 {
2595         /*
2596          * If the AP instructions are not being interpreted and the MSAX3
2597          * facility is not configured for the guest, there is nothing to set up.
2598          */
2599         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2600                 return;
2601
2602         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2603         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2604
2605         if (vcpu->kvm->arch.crypto.apie)
2606                 vcpu->arch.sie_block->eca |= ECA_APIE;
2607
2608         /* Set up protected key support */
2609         if (vcpu->kvm->arch.crypto.aes_kw)
2610                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2611         if (vcpu->kvm->arch.crypto.dea_kw)
2612                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2613 }
2614
2615 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2616 {
2617         free_page(vcpu->arch.sie_block->cbrlo);
2618         vcpu->arch.sie_block->cbrlo = 0;
2619 }
2620
2621 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2622 {
2623         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2624         if (!vcpu->arch.sie_block->cbrlo)
2625                 return -ENOMEM;
2626         return 0;
2627 }
2628
2629 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2630 {
2631         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2632
2633         vcpu->arch.sie_block->ibc = model->ibc;
2634         if (test_kvm_facility(vcpu->kvm, 7))
2635                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2636 }
2637
2638 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2639 {
2640         int rc = 0;
2641
2642         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2643                                                     CPUSTAT_SM |
2644                                                     CPUSTAT_STOPPED);
2645
2646         if (test_kvm_facility(vcpu->kvm, 78))
2647                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2648         else if (test_kvm_facility(vcpu->kvm, 8))
2649                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2650
2651         kvm_s390_vcpu_setup_model(vcpu);
2652
2653         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2654         if (MACHINE_HAS_ESOP)
2655                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2656         if (test_kvm_facility(vcpu->kvm, 9))
2657                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2658         if (test_kvm_facility(vcpu->kvm, 73))
2659                 vcpu->arch.sie_block->ecb |= ECB_TE;
2660
2661         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2662                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2663         if (test_kvm_facility(vcpu->kvm, 130))
2664                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2665         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2666         if (sclp.has_cei)
2667                 vcpu->arch.sie_block->eca |= ECA_CEI;
2668         if (sclp.has_ib)
2669                 vcpu->arch.sie_block->eca |= ECA_IB;
2670         if (sclp.has_siif)
2671                 vcpu->arch.sie_block->eca |= ECA_SII;
2672         if (sclp.has_sigpif)
2673                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2674         if (test_kvm_facility(vcpu->kvm, 129)) {
2675                 vcpu->arch.sie_block->eca |= ECA_VX;
2676                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2677         }
2678         if (test_kvm_facility(vcpu->kvm, 139))
2679                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2680         if (test_kvm_facility(vcpu->kvm, 156))
2681                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2682         if (vcpu->arch.sie_block->gd) {
2683                 vcpu->arch.sie_block->eca |= ECA_AIV;
2684                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2685                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2686         }
2687         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2688                                         | SDNXC;
2689         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2690
2691         if (sclp.has_kss)
2692                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2693         else
2694                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2695
2696         if (vcpu->kvm->arch.use_cmma) {
2697                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2698                 if (rc)
2699                         return rc;
2700         }
2701         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2702         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2703
2704         kvm_s390_vcpu_crypto_setup(vcpu);
2705
2706         return rc;
2707 }
2708
2709 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2710                                       unsigned int id)
2711 {
2712         struct kvm_vcpu *vcpu;
2713         struct sie_page *sie_page;
2714         int rc = -EINVAL;
2715
2716         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2717                 goto out;
2718
2719         rc = -ENOMEM;
2720
2721         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2722         if (!vcpu)
2723                 goto out;
2724
2725         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2726         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2727         if (!sie_page)
2728                 goto out_free_cpu;
2729
2730         vcpu->arch.sie_block = &sie_page->sie_block;
2731         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2732
2733         /* the real guest size will always be smaller than msl */
2734         vcpu->arch.sie_block->mso = 0;
2735         vcpu->arch.sie_block->msl = sclp.hamax;
2736
2737         vcpu->arch.sie_block->icpua = id;
2738         spin_lock_init(&vcpu->arch.local_int.lock);
2739         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2740         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2741                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2742         seqcount_init(&vcpu->arch.cputm_seqcount);
2743
2744         rc = kvm_vcpu_init(vcpu, kvm, id);
2745         if (rc)
2746                 goto out_free_sie_block;
2747         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2748                  vcpu->arch.sie_block);
2749         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2750
2751         return vcpu;
2752 out_free_sie_block:
2753         free_page((unsigned long)(vcpu->arch.sie_block));
2754 out_free_cpu:
2755         kmem_cache_free(kvm_vcpu_cache, vcpu);
2756 out:
2757         return ERR_PTR(rc);
2758 }
2759
2760 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2761 {
2762         return kvm_s390_vcpu_has_irq(vcpu, 0);
2763 }
2764
2765 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2766 {
2767         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2768 }
2769
2770 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2771 {
2772         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2773         exit_sie(vcpu);
2774 }
2775
2776 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2777 {
2778         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2779 }
2780
2781 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2782 {
2783         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2784         exit_sie(vcpu);
2785 }
2786
2787 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
2788 {
2789         return atomic_read(&vcpu->arch.sie_block->prog20) &
2790                (PROG_BLOCK_SIE | PROG_REQUEST);
2791 }
2792
2793 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2794 {
2795         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2796 }
2797
2798 /*
2799  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
2800  * If the CPU is not running (e.g. waiting as idle) the function will
2801  * return immediately. */
2802 void exit_sie(struct kvm_vcpu *vcpu)
2803 {
2804         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2805         kvm_s390_vsie_kick(vcpu);
2806         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2807                 cpu_relax();
2808 }
2809
2810 /* Kick a guest cpu out of SIE to process a request synchronously */
2811 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2812 {
2813         kvm_make_request(req, vcpu);
2814         kvm_s390_vcpu_request(vcpu);
2815 }
2816
2817 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2818                               unsigned long end)
2819 {
2820         struct kvm *kvm = gmap->private;
2821         struct kvm_vcpu *vcpu;
2822         unsigned long prefix;
2823         int i;
2824
2825         if (gmap_is_shadow(gmap))
2826                 return;
2827         if (start >= 1UL << 31)
2828                 /* We are only interested in prefix pages */
2829                 return;
2830         kvm_for_each_vcpu(i, vcpu, kvm) {
2831                 /* match against both prefix pages */
2832                 prefix = kvm_s390_get_prefix(vcpu);
2833                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2834                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2835                                    start, end);
2836                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2837                 }
2838         }
2839 }
2840
2841 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2842 {
2843         /* kvm common code refers to this, but never calls it */
2844         BUG();
2845         return 0;
2846 }
2847
2848 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2849                                            struct kvm_one_reg *reg)
2850 {
2851         int r = -EINVAL;
2852
2853         switch (reg->id) {
2854         case KVM_REG_S390_TODPR:
2855                 r = put_user(vcpu->arch.sie_block->todpr,
2856                              (u32 __user *)reg->addr);
2857                 break;
2858         case KVM_REG_S390_EPOCHDIFF:
2859                 r = put_user(vcpu->arch.sie_block->epoch,
2860                              (u64 __user *)reg->addr);
2861                 break;
2862         case KVM_REG_S390_CPU_TIMER:
2863                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2864                              (u64 __user *)reg->addr);
2865                 break;
2866         case KVM_REG_S390_CLOCK_COMP:
2867                 r = put_user(vcpu->arch.sie_block->ckc,
2868                              (u64 __user *)reg->addr);
2869                 break;
2870         case KVM_REG_S390_PFTOKEN:
2871                 r = put_user(vcpu->arch.pfault_token,
2872                              (u64 __user *)reg->addr);
2873                 break;
2874         case KVM_REG_S390_PFCOMPARE:
2875                 r = put_user(vcpu->arch.pfault_compare,
2876                              (u64 __user *)reg->addr);
2877                 break;
2878         case KVM_REG_S390_PFSELECT:
2879                 r = put_user(vcpu->arch.pfault_select,
2880                              (u64 __user *)reg->addr);
2881                 break;
2882         case KVM_REG_S390_PP:
2883                 r = put_user(vcpu->arch.sie_block->pp,
2884                              (u64 __user *)reg->addr);
2885                 break;
2886         case KVM_REG_S390_GBEA:
2887                 r = put_user(vcpu->arch.sie_block->gbea,
2888                              (u64 __user *)reg->addr);
2889                 break;
2890         default:
2891                 break;
2892         }
2893
2894         return r;
2895 }
2896
2897 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2898                                            struct kvm_one_reg *reg)
2899 {
2900         int r = -EINVAL;
2901         __u64 val;
2902
2903         switch (reg->id) {
2904         case KVM_REG_S390_TODPR:
2905                 r = get_user(vcpu->arch.sie_block->todpr,
2906                              (u32 __user *)reg->addr);
2907                 break;
2908         case KVM_REG_S390_EPOCHDIFF:
2909                 r = get_user(vcpu->arch.sie_block->epoch,
2910                              (u64 __user *)reg->addr);
2911                 break;
2912         case KVM_REG_S390_CPU_TIMER:
2913                 r = get_user(val, (u64 __user *)reg->addr);
2914                 if (!r)
2915                         kvm_s390_set_cpu_timer(vcpu, val);
2916                 break;
2917         case KVM_REG_S390_CLOCK_COMP:
2918                 r = get_user(vcpu->arch.sie_block->ckc,
2919                              (u64 __user *)reg->addr);
2920                 break;
2921         case KVM_REG_S390_PFTOKEN:
2922                 r = get_user(vcpu->arch.pfault_token,
2923                              (u64 __user *)reg->addr);
2924                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2925                         kvm_clear_async_pf_completion_queue(vcpu);
2926                 break;
2927         case KVM_REG_S390_PFCOMPARE:
2928                 r = get_user(vcpu->arch.pfault_compare,
2929                              (u64 __user *)reg->addr);
2930                 break;
2931         case KVM_REG_S390_PFSELECT:
2932                 r = get_user(vcpu->arch.pfault_select,
2933                              (u64 __user *)reg->addr);
2934                 break;
2935         case KVM_REG_S390_PP:
2936                 r = get_user(vcpu->arch.sie_block->pp,
2937                              (u64 __user *)reg->addr);
2938                 break;
2939         case KVM_REG_S390_GBEA:
2940                 r = get_user(vcpu->arch.sie_block->gbea,
2941                              (u64 __user *)reg->addr);
2942                 break;
2943         default:
2944                 break;
2945         }
2946
2947         return r;
2948 }
2949
2950 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2951 {
2952         kvm_s390_vcpu_initial_reset(vcpu);
2953         return 0;
2954 }
2955
2956 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2957 {
2958         vcpu_load(vcpu);
2959         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2960         vcpu_put(vcpu);
2961         return 0;
2962 }
2963
2964 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2965 {
2966         vcpu_load(vcpu);
2967         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2968         vcpu_put(vcpu);
2969         return 0;
2970 }
2971
2972 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2973                                   struct kvm_sregs *sregs)
2974 {
2975         vcpu_load(vcpu);
2976
2977         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2978         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2979
2980         vcpu_put(vcpu);
2981         return 0;
2982 }
2983
2984 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2985                                   struct kvm_sregs *sregs)
2986 {
2987         vcpu_load(vcpu);
2988
2989         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2990         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2991
2992         vcpu_put(vcpu);
2993         return 0;
2994 }
2995
2996 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2997 {
2998         int ret = 0;
2999
3000         vcpu_load(vcpu);
3001
3002         if (test_fp_ctl(fpu->fpc)) {
3003                 ret = -EINVAL;
3004                 goto out;
3005         }
3006         vcpu->run->s.regs.fpc = fpu->fpc;
3007         if (MACHINE_HAS_VX)
3008                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3009                                  (freg_t *) fpu->fprs);
3010         else
3011                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3012
3013 out:
3014         vcpu_put(vcpu);
3015         return ret;
3016 }
3017
3018 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3019 {
3020         vcpu_load(vcpu);
3021
3022         /* make sure we have the latest values */
3023         save_fpu_regs();
3024         if (MACHINE_HAS_VX)
3025                 convert_vx_to_fp((freg_t *) fpu->fprs,
3026                                  (__vector128 *) vcpu->run->s.regs.vrs);
3027         else
3028                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3029         fpu->fpc = vcpu->run->s.regs.fpc;
3030
3031         vcpu_put(vcpu);
3032         return 0;
3033 }
3034
3035 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3036 {
3037         int rc = 0;
3038
3039         if (!is_vcpu_stopped(vcpu))
3040                 rc = -EBUSY;
3041         else {
3042                 vcpu->run->psw_mask = psw.mask;
3043                 vcpu->run->psw_addr = psw.addr;
3044         }
3045         return rc;
3046 }
3047
3048 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3049                                   struct kvm_translation *tr)
3050 {
3051         return -EINVAL; /* not implemented yet */
3052 }
3053
3054 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3055                               KVM_GUESTDBG_USE_HW_BP | \
3056                               KVM_GUESTDBG_ENABLE)
3057
3058 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3059                                         struct kvm_guest_debug *dbg)
3060 {
3061         int rc = 0;
3062
3063         vcpu_load(vcpu);
3064
3065         vcpu->guest_debug = 0;
3066         kvm_s390_clear_bp_data(vcpu);
3067
3068         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3069                 rc = -EINVAL;
3070                 goto out;
3071         }
3072         if (!sclp.has_gpere) {
3073                 rc = -EINVAL;
3074                 goto out;
3075         }
3076
3077         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3078                 vcpu->guest_debug = dbg->control;
3079                 /* enforce guest PER */
3080                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3081
3082                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3083                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3084         } else {
3085                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3086                 vcpu->arch.guestdbg.last_bp = 0;
3087         }
3088
3089         if (rc) {
3090                 vcpu->guest_debug = 0;
3091                 kvm_s390_clear_bp_data(vcpu);
3092                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3093         }
3094
3095 out:
3096         vcpu_put(vcpu);
3097         return rc;
3098 }
3099
3100 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3101                                     struct kvm_mp_state *mp_state)
3102 {
3103         int ret;
3104
3105         vcpu_load(vcpu);
3106
3107         /* CHECK_STOP and LOAD are not supported yet */
3108         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3109                                       KVM_MP_STATE_OPERATING;
3110
3111         vcpu_put(vcpu);
3112         return ret;
3113 }
3114
3115 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3116                                     struct kvm_mp_state *mp_state)
3117 {
3118         int rc = 0;
3119
3120         vcpu_load(vcpu);
3121
3122         /* user space knows about this interface - let it control the state */
3123         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3124
3125         switch (mp_state->mp_state) {
3126         case KVM_MP_STATE_STOPPED:
3127                 kvm_s390_vcpu_stop(vcpu);
3128                 break;
3129         case KVM_MP_STATE_OPERATING:
3130                 kvm_s390_vcpu_start(vcpu);
3131                 break;
3132         case KVM_MP_STATE_LOAD:
3133         case KVM_MP_STATE_CHECK_STOP:
3134                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3135         default:
3136                 rc = -ENXIO;
3137         }
3138
3139         vcpu_put(vcpu);
3140         return rc;
3141 }
3142
3143 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3144 {
3145         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3146 }
3147
3148 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3149 {
3150 retry:
3151         kvm_s390_vcpu_request_handled(vcpu);
3152         if (!kvm_request_pending(vcpu))
3153                 return 0;
3154         /*
3155          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3156          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3157          * This ensures that the ipte instruction for this request has
3158          * already finished. We might race against a second unmapper that
3159          * wants to set the blocking bit. Lets just retry the request loop.
3160          */
3161         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3162                 int rc;
3163                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3164                                           kvm_s390_get_prefix(vcpu),
3165                                           PAGE_SIZE * 2, PROT_WRITE);
3166                 if (rc) {
3167                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3168                         return rc;
3169                 }
3170                 goto retry;
3171         }
3172
3173         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3174                 vcpu->arch.sie_block->ihcpu = 0xffff;
3175                 goto retry;
3176         }
3177
3178         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3179                 if (!ibs_enabled(vcpu)) {
3180                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3181                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3182                 }
3183                 goto retry;
3184         }
3185
3186         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3187                 if (ibs_enabled(vcpu)) {
3188                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3189                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3190                 }
3191                 goto retry;
3192         }
3193
3194         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3195                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3196                 goto retry;
3197         }
3198
3199         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3200                 /*
3201                  * Disable CMM virtualization; we will emulate the ESSA
3202                  * instruction manually, in order to provide additional
3203                  * functionalities needed for live migration.
3204                  */
3205                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3206                 goto retry;
3207         }
3208
3209         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3210                 /*
3211                  * Re-enable CMM virtualization if CMMA is available and
3212                  * CMM has been used.
3213                  */
3214                 if ((vcpu->kvm->arch.use_cmma) &&
3215                     (vcpu->kvm->mm->context.uses_cmm))
3216                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3217                 goto retry;
3218         }
3219
3220         /* nothing to do, just clear the request */
3221         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3222         /* we left the vsie handler, nothing to do, just clear the request */
3223         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3224
3225         return 0;
3226 }
3227
3228 void kvm_s390_set_tod_clock(struct kvm *kvm,
3229                             const struct kvm_s390_vm_tod_clock *gtod)
3230 {
3231         struct kvm_vcpu *vcpu;
3232         struct kvm_s390_tod_clock_ext htod;
3233         int i;
3234
3235         mutex_lock(&kvm->lock);
3236         preempt_disable();
3237
3238         get_tod_clock_ext((char *)&htod);
3239
3240         kvm->arch.epoch = gtod->tod - htod.tod;
3241         kvm->arch.epdx = 0;
3242         if (test_kvm_facility(kvm, 139)) {
3243                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3244                 if (kvm->arch.epoch > gtod->tod)
3245                         kvm->arch.epdx -= 1;
3246         }
3247
3248         kvm_s390_vcpu_block_all(kvm);
3249         kvm_for_each_vcpu(i, vcpu, kvm) {
3250                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3251                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3252         }
3253
3254         kvm_s390_vcpu_unblock_all(kvm);
3255         preempt_enable();
3256         mutex_unlock(&kvm->lock);
3257 }
3258
3259 /**
3260  * kvm_arch_fault_in_page - fault-in guest page if necessary
3261  * @vcpu: The corresponding virtual cpu
3262  * @gpa: Guest physical address
3263  * @writable: Whether the page should be writable or not
3264  *
3265  * Make sure that a guest page has been faulted-in on the host.
3266  *
3267  * Return: Zero on success, negative error code otherwise.
3268  */
3269 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3270 {
3271         return gmap_fault(vcpu->arch.gmap, gpa,
3272                           writable ? FAULT_FLAG_WRITE : 0);
3273 }
3274
3275 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3276                                       unsigned long token)
3277 {
3278         struct kvm_s390_interrupt inti;
3279         struct kvm_s390_irq irq;
3280
3281         if (start_token) {
3282                 irq.u.ext.ext_params2 = token;
3283                 irq.type = KVM_S390_INT_PFAULT_INIT;
3284                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3285         } else {
3286                 inti.type = KVM_S390_INT_PFAULT_DONE;
3287                 inti.parm64 = token;
3288                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3289         }
3290 }
3291
3292 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3293                                      struct kvm_async_pf *work)
3294 {
3295         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3296         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3297 }
3298
3299 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3300                                  struct kvm_async_pf *work)
3301 {
3302         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3303         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3304 }
3305
3306 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3307                                struct kvm_async_pf *work)
3308 {
3309         /* s390 will always inject the page directly */
3310 }
3311
3312 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3313 {
3314         /*
3315          * s390 will always inject the page directly,
3316          * but we still want check_async_completion to cleanup
3317          */
3318         return true;
3319 }
3320
3321 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3322 {
3323         hva_t hva;
3324         struct kvm_arch_async_pf arch;
3325         int rc;
3326
3327         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3328                 return 0;
3329         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3330             vcpu->arch.pfault_compare)
3331                 return 0;
3332         if (psw_extint_disabled(vcpu))
3333                 return 0;
3334         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3335                 return 0;
3336         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3337                 return 0;
3338         if (!vcpu->arch.gmap->pfault_enabled)
3339                 return 0;
3340
3341         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3342         hva += current->thread.gmap_addr & ~PAGE_MASK;
3343         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3344                 return 0;
3345
3346         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3347         return rc;
3348 }
3349
3350 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3351 {
3352         int rc, cpuflags;
3353
3354         /*
3355          * On s390 notifications for arriving pages will be delivered directly
3356          * to the guest but the house keeping for completed pfaults is
3357          * handled outside the worker.
3358          */
3359         kvm_check_async_pf_completion(vcpu);
3360
3361         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3362         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3363
3364         if (need_resched())
3365                 schedule();
3366
3367         if (test_cpu_flag(CIF_MCCK_PENDING))
3368                 s390_handle_mcck();
3369
3370         if (!kvm_is_ucontrol(vcpu->kvm)) {
3371                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3372                 if (rc)
3373                         return rc;
3374         }
3375
3376         rc = kvm_s390_handle_requests(vcpu);
3377         if (rc)
3378                 return rc;
3379
3380         if (guestdbg_enabled(vcpu)) {
3381                 kvm_s390_backup_guest_per_regs(vcpu);
3382                 kvm_s390_patch_guest_per_regs(vcpu);
3383         }
3384
3385         vcpu->arch.sie_block->icptcode = 0;
3386         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3387         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3388         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3389
3390         return 0;
3391 }
3392
3393 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3394 {
3395         struct kvm_s390_pgm_info pgm_info = {
3396                 .code = PGM_ADDRESSING,
3397         };
3398         u8 opcode, ilen;
3399         int rc;
3400
3401         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3402         trace_kvm_s390_sie_fault(vcpu);
3403
3404         /*
3405          * We want to inject an addressing exception, which is defined as a
3406          * suppressing or terminating exception. However, since we came here
3407          * by a DAT access exception, the PSW still points to the faulting
3408          * instruction since DAT exceptions are nullifying. So we've got
3409          * to look up the current opcode to get the length of the instruction
3410          * to be able to forward the PSW.
3411          */
3412         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3413         ilen = insn_length(opcode);
3414         if (rc < 0) {
3415                 return rc;
3416         } else if (rc) {
3417                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3418                  * Forward by arbitrary ilc, injection will take care of
3419                  * nullification if necessary.
3420                  */
3421                 pgm_info = vcpu->arch.pgm;
3422                 ilen = 4;
3423         }
3424         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3425         kvm_s390_forward_psw(vcpu, ilen);
3426         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3427 }
3428
3429 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3430 {
3431         struct mcck_volatile_info *mcck_info;
3432         struct sie_page *sie_page;
3433
3434         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3435                    vcpu->arch.sie_block->icptcode);
3436         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3437
3438         if (guestdbg_enabled(vcpu))
3439                 kvm_s390_restore_guest_per_regs(vcpu);
3440
3441         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3442         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3443
3444         if (exit_reason == -EINTR) {
3445                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3446                 sie_page = container_of(vcpu->arch.sie_block,
3447                                         struct sie_page, sie_block);
3448                 mcck_info = &sie_page->mcck_info;
3449                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3450                 return 0;
3451         }
3452
3453         if (vcpu->arch.sie_block->icptcode > 0) {
3454                 int rc = kvm_handle_sie_intercept(vcpu);
3455
3456                 if (rc != -EOPNOTSUPP)
3457                         return rc;
3458                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3459                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3460                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3461                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3462                 return -EREMOTE;
3463         } else if (exit_reason != -EFAULT) {
3464                 vcpu->stat.exit_null++;
3465                 return 0;
3466         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3467                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3468                 vcpu->run->s390_ucontrol.trans_exc_code =
3469                                                 current->thread.gmap_addr;
3470                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3471                 return -EREMOTE;
3472         } else if (current->thread.gmap_pfault) {
3473                 trace_kvm_s390_major_guest_pfault(vcpu);
3474                 current->thread.gmap_pfault = 0;
3475                 if (kvm_arch_setup_async_pf(vcpu))
3476                         return 0;
3477                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3478         }
3479         return vcpu_post_run_fault_in_sie(vcpu);
3480 }
3481
3482 static int __vcpu_run(struct kvm_vcpu *vcpu)
3483 {
3484         int rc, exit_reason;
3485
3486         /*
3487          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3488          * ning the guest), so that memslots (and other stuff) are protected
3489          */
3490         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3491
3492         do {
3493                 rc = vcpu_pre_run(vcpu);
3494                 if (rc)
3495                         break;
3496
3497                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3498                 /*
3499                  * As PF_VCPU will be used in fault handler, between
3500                  * guest_enter and guest_exit should be no uaccess.
3501                  */
3502                 local_irq_disable();
3503                 guest_enter_irqoff();
3504                 __disable_cpu_timer_accounting(vcpu);
3505                 local_irq_enable();
3506                 exit_reason = sie64a(vcpu->arch.sie_block,
3507                                      vcpu->run->s.regs.gprs);
3508                 local_irq_disable();
3509                 __enable_cpu_timer_accounting(vcpu);
3510                 guest_exit_irqoff();
3511                 local_irq_enable();
3512                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3513
3514                 rc = vcpu_post_run(vcpu, exit_reason);
3515         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3516
3517         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3518         return rc;
3519 }
3520
3521 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3522 {
3523         struct runtime_instr_cb *riccb;
3524         struct gs_cb *gscb;
3525
3526         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3527         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3528         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3529         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3530         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3531                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3532         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3533                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3534                 /* some control register changes require a tlb flush */
3535                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3536         }
3537         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3538                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3539                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3540                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3541                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3542                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3543         }
3544         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3545                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3546                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3547                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3548                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3549                         kvm_clear_async_pf_completion_queue(vcpu);
3550         }
3551         /*
3552          * If userspace sets the riccb (e.g. after migration) to a valid state,
3553          * we should enable RI here instead of doing the lazy enablement.
3554          */
3555         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3556             test_kvm_facility(vcpu->kvm, 64) &&
3557             riccb->v &&
3558             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3559                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3560                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3561         }
3562         /*
3563          * If userspace sets the gscb (e.g. after migration) to non-zero,
3564          * we should enable GS here instead of doing the lazy enablement.
3565          */
3566         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3567             test_kvm_facility(vcpu->kvm, 133) &&
3568             gscb->gssm &&
3569             !vcpu->arch.gs_enabled) {
3570                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3571                 vcpu->arch.sie_block->ecb |= ECB_GS;
3572                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3573                 vcpu->arch.gs_enabled = 1;
3574         }
3575         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3576             test_kvm_facility(vcpu->kvm, 82)) {
3577                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3578                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3579         }
3580         save_access_regs(vcpu->arch.host_acrs);
3581         restore_access_regs(vcpu->run->s.regs.acrs);
3582         /* save host (userspace) fprs/vrs */
3583         save_fpu_regs();
3584         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3585         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3586         if (MACHINE_HAS_VX)
3587                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3588         else
3589                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3590         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3591         if (test_fp_ctl(current->thread.fpu.fpc))
3592                 /* User space provided an invalid FPC, let's clear it */
3593                 current->thread.fpu.fpc = 0;
3594         if (MACHINE_HAS_GS) {
3595                 preempt_disable();
3596                 __ctl_set_bit(2, 4);
3597                 if (current->thread.gs_cb) {
3598                         vcpu->arch.host_gscb = current->thread.gs_cb;
3599                         save_gs_cb(vcpu->arch.host_gscb);
3600                 }
3601                 if (vcpu->arch.gs_enabled) {
3602                         current->thread.gs_cb = (struct gs_cb *)
3603                                                 &vcpu->run->s.regs.gscb;
3604                         restore_gs_cb(current->thread.gs_cb);
3605                 }
3606                 preempt_enable();
3607         }
3608         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3609
3610         kvm_run->kvm_dirty_regs = 0;
3611 }
3612
3613 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3614 {
3615         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3616         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3617         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3618         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3619         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3620         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3621         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3622         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3623         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3624         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3625         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3626         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3627         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3628         save_access_regs(vcpu->run->s.regs.acrs);
3629         restore_access_regs(vcpu->arch.host_acrs);
3630         /* Save guest register state */
3631         save_fpu_regs();
3632         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3633         /* Restore will be done lazily at return */
3634         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3635         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3636         if (MACHINE_HAS_GS) {
3637                 __ctl_set_bit(2, 4);
3638                 if (vcpu->arch.gs_enabled)
3639                         save_gs_cb(current->thread.gs_cb);
3640                 preempt_disable();
3641                 current->thread.gs_cb = vcpu->arch.host_gscb;
3642                 restore_gs_cb(vcpu->arch.host_gscb);
3643                 preempt_enable();
3644                 if (!vcpu->arch.host_gscb)
3645                         __ctl_clear_bit(2, 4);
3646                 vcpu->arch.host_gscb = NULL;
3647         }
3648         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3649 }
3650
3651 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3652 {
3653         int rc;
3654
3655         if (kvm_run->immediate_exit)
3656                 return -EINTR;
3657
3658         vcpu_load(vcpu);
3659
3660         if (guestdbg_exit_pending(vcpu)) {
3661                 kvm_s390_prepare_debug_exit(vcpu);
3662                 rc = 0;
3663                 goto out;
3664         }
3665
3666         kvm_sigset_activate(vcpu);
3667
3668         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3669                 kvm_s390_vcpu_start(vcpu);
3670         } else if (is_vcpu_stopped(vcpu)) {
3671                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3672                                    vcpu->vcpu_id);
3673                 rc = -EINVAL;
3674                 goto out;
3675         }
3676
3677         sync_regs(vcpu, kvm_run);
3678         enable_cpu_timer_accounting(vcpu);
3679
3680         might_fault();
3681         rc = __vcpu_run(vcpu);
3682
3683         if (signal_pending(current) && !rc) {
3684                 kvm_run->exit_reason = KVM_EXIT_INTR;
3685                 rc = -EINTR;
3686         }
3687
3688         if (guestdbg_exit_pending(vcpu) && !rc)  {
3689                 kvm_s390_prepare_debug_exit(vcpu);
3690                 rc = 0;
3691         }
3692
3693         if (rc == -EREMOTE) {
3694                 /* userspace support is needed, kvm_run has been prepared */
3695                 rc = 0;
3696         }
3697
3698         disable_cpu_timer_accounting(vcpu);
3699         store_regs(vcpu, kvm_run);
3700
3701         kvm_sigset_deactivate(vcpu);
3702
3703         vcpu->stat.exit_userspace++;
3704 out:
3705         vcpu_put(vcpu);
3706         return rc;
3707 }
3708
3709 /*
3710  * store status at address
3711  * we use have two special cases:
3712  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3713  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3714  */
3715 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3716 {
3717         unsigned char archmode = 1;
3718         freg_t fprs[NUM_FPRS];
3719         unsigned int px;
3720         u64 clkcomp, cputm;
3721         int rc;
3722
3723         px = kvm_s390_get_prefix(vcpu);
3724         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3725                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3726                         return -EFAULT;
3727                 gpa = 0;
3728         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3729                 if (write_guest_real(vcpu, 163, &archmode, 1))
3730                         return -EFAULT;
3731                 gpa = px;
3732         } else
3733                 gpa -= __LC_FPREGS_SAVE_AREA;
3734
3735         /* manually convert vector registers if necessary */
3736         if (MACHINE_HAS_VX) {
3737                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3738                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3739                                      fprs, 128);
3740         } else {
3741                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3742                                      vcpu->run->s.regs.fprs, 128);
3743         }
3744         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3745                               vcpu->run->s.regs.gprs, 128);
3746         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3747                               &vcpu->arch.sie_block->gpsw, 16);
3748         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3749                               &px, 4);
3750         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3751                               &vcpu->run->s.regs.fpc, 4);
3752         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3753                               &vcpu->arch.sie_block->todpr, 4);
3754         cputm = kvm_s390_get_cpu_timer(vcpu);
3755         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3756                               &cputm, 8);
3757         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3758         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3759                               &clkcomp, 8);
3760         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3761                               &vcpu->run->s.regs.acrs, 64);
3762         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3763                               &vcpu->arch.sie_block->gcr, 128);
3764         return rc ? -EFAULT : 0;
3765 }
3766
3767 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3768 {
3769         /*
3770          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3771          * switch in the run ioctl. Let's update our copies before we save
3772          * it into the save area
3773          */
3774         save_fpu_regs();
3775         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3776         save_access_regs(vcpu->run->s.regs.acrs);
3777
3778         return kvm_s390_store_status_unloaded(vcpu, addr);
3779 }
3780
3781 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3782 {
3783         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3784         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3785 }
3786
3787 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3788 {
3789         unsigned int i;
3790         struct kvm_vcpu *vcpu;
3791
3792         kvm_for_each_vcpu(i, vcpu, kvm) {
3793                 __disable_ibs_on_vcpu(vcpu);
3794         }
3795 }
3796
3797 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3798 {
3799         if (!sclp.has_ibs)
3800                 return;
3801         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3802         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3803 }
3804
3805 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3806 {
3807         int i, online_vcpus, started_vcpus = 0;
3808
3809         if (!is_vcpu_stopped(vcpu))
3810                 return;
3811
3812         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3813         /* Only one cpu at a time may enter/leave the STOPPED state. */
3814         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3815         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3816
3817         for (i = 0; i < online_vcpus; i++) {
3818                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3819                         started_vcpus++;
3820         }
3821
3822         if (started_vcpus == 0) {
3823                 /* we're the only active VCPU -> speed it up */
3824                 __enable_ibs_on_vcpu(vcpu);
3825         } else if (started_vcpus == 1) {
3826                 /*
3827                  * As we are starting a second VCPU, we have to disable
3828                  * the IBS facility on all VCPUs to remove potentially
3829                  * oustanding ENABLE requests.
3830                  */
3831                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3832         }
3833
3834         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3835         /*
3836          * Another VCPU might have used IBS while we were offline.
3837          * Let's play safe and flush the VCPU at startup.
3838          */
3839         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3840         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3841         return;
3842 }
3843
3844 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3845 {
3846         int i, online_vcpus, started_vcpus = 0;
3847         struct kvm_vcpu *started_vcpu = NULL;
3848
3849         if (is_vcpu_stopped(vcpu))
3850                 return;
3851
3852         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3853         /* Only one cpu at a time may enter/leave the STOPPED state. */
3854         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3855         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3856
3857         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3858         kvm_s390_clear_stop_irq(vcpu);
3859
3860         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3861         __disable_ibs_on_vcpu(vcpu);
3862
3863         for (i = 0; i < online_vcpus; i++) {
3864                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3865                         started_vcpus++;
3866                         started_vcpu = vcpu->kvm->vcpus[i];
3867                 }
3868         }
3869
3870         if (started_vcpus == 1) {
3871                 /*
3872                  * As we only have one VCPU left, we want to enable the
3873                  * IBS facility for that VCPU to speed it up.
3874                  */
3875                 __enable_ibs_on_vcpu(started_vcpu);
3876         }
3877
3878         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3879         return;
3880 }
3881
3882 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3883                                      struct kvm_enable_cap *cap)
3884 {
3885         int r;
3886
3887         if (cap->flags)
3888                 return -EINVAL;
3889
3890         switch (cap->cap) {
3891         case KVM_CAP_S390_CSS_SUPPORT:
3892                 if (!vcpu->kvm->arch.css_support) {
3893                         vcpu->kvm->arch.css_support = 1;
3894                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3895                         trace_kvm_s390_enable_css(vcpu->kvm);
3896                 }
3897                 r = 0;
3898                 break;
3899         default:
3900                 r = -EINVAL;
3901                 break;
3902         }
3903         return r;
3904 }
3905
3906 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3907                                   struct kvm_s390_mem_op *mop)
3908 {
3909         void __user *uaddr = (void __user *)mop->buf;
3910         void *tmpbuf = NULL;
3911         int r, srcu_idx;
3912         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3913                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3914
3915         if (mop->flags & ~supported_flags)
3916                 return -EINVAL;
3917
3918         if (mop->size > MEM_OP_MAX_SIZE)
3919                 return -E2BIG;
3920
3921         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3922                 tmpbuf = vmalloc(mop->size);
3923                 if (!tmpbuf)
3924                         return -ENOMEM;
3925         }
3926
3927         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3928
3929         switch (mop->op) {
3930         case KVM_S390_MEMOP_LOGICAL_READ:
3931                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3932                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3933                                             mop->size, GACC_FETCH);
3934                         break;
3935                 }
3936                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3937                 if (r == 0) {
3938                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3939                                 r = -EFAULT;
3940                 }
3941                 break;
3942         case KVM_S390_MEMOP_LOGICAL_WRITE:
3943                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3944                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3945                                             mop->size, GACC_STORE);
3946                         break;
3947                 }
3948                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3949                         r = -EFAULT;
3950                         break;
3951                 }
3952                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3953                 break;
3954         default:
3955                 r = -EINVAL;
3956         }
3957
3958         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3959
3960         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3961                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3962
3963         vfree(tmpbuf);
3964         return r;
3965 }
3966
3967 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3968                                unsigned int ioctl, unsigned long arg)
3969 {
3970         struct kvm_vcpu *vcpu = filp->private_data;
3971         void __user *argp = (void __user *)arg;
3972
3973         switch (ioctl) {
3974         case KVM_S390_IRQ: {
3975                 struct kvm_s390_irq s390irq;
3976
3977                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3978                         return -EFAULT;
3979                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3980         }
3981         case KVM_S390_INTERRUPT: {
3982                 struct kvm_s390_interrupt s390int;
3983                 struct kvm_s390_irq s390irq;
3984
3985                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3986                         return -EFAULT;
3987                 if (s390int_to_s390irq(&s390int, &s390irq))
3988                         return -EINVAL;
3989                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3990         }
3991         }
3992         return -ENOIOCTLCMD;
3993 }
3994
3995 long kvm_arch_vcpu_ioctl(struct file *filp,
3996                          unsigned int ioctl, unsigned long arg)
3997 {
3998         struct kvm_vcpu *vcpu = filp->private_data;
3999         void __user *argp = (void __user *)arg;
4000         int idx;
4001         long r;
4002
4003         vcpu_load(vcpu);
4004
4005         switch (ioctl) {
4006         case KVM_S390_STORE_STATUS:
4007                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4008                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4009                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4010                 break;
4011         case KVM_S390_SET_INITIAL_PSW: {
4012                 psw_t psw;
4013
4014                 r = -EFAULT;
4015                 if (copy_from_user(&psw, argp, sizeof(psw)))
4016                         break;
4017                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4018                 break;
4019         }
4020         case KVM_S390_INITIAL_RESET:
4021                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4022                 break;
4023         case KVM_SET_ONE_REG:
4024         case KVM_GET_ONE_REG: {
4025                 struct kvm_one_reg reg;
4026                 r = -EFAULT;
4027                 if (copy_from_user(&reg, argp, sizeof(reg)))
4028                         break;
4029                 if (ioctl == KVM_SET_ONE_REG)
4030                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4031                 else
4032                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4033                 break;
4034         }
4035 #ifdef CONFIG_KVM_S390_UCONTROL
4036         case KVM_S390_UCAS_MAP: {
4037                 struct kvm_s390_ucas_mapping ucasmap;
4038
4039                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4040                         r = -EFAULT;
4041                         break;
4042                 }
4043
4044                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4045                         r = -EINVAL;
4046                         break;
4047                 }
4048
4049                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4050                                      ucasmap.vcpu_addr, ucasmap.length);
4051                 break;
4052         }
4053         case KVM_S390_UCAS_UNMAP: {
4054                 struct kvm_s390_ucas_mapping ucasmap;
4055
4056                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4057                         r = -EFAULT;
4058                         break;
4059                 }
4060
4061                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4062                         r = -EINVAL;
4063                         break;
4064                 }
4065
4066                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4067                         ucasmap.length);
4068                 break;
4069         }
4070 #endif
4071         case KVM_S390_VCPU_FAULT: {
4072                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4073                 break;
4074         }
4075         case KVM_ENABLE_CAP:
4076         {
4077                 struct kvm_enable_cap cap;
4078                 r = -EFAULT;
4079                 if (copy_from_user(&cap, argp, sizeof(cap)))
4080                         break;
4081                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4082                 break;
4083         }
4084         case KVM_S390_MEM_OP: {
4085                 struct kvm_s390_mem_op mem_op;
4086
4087                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4088                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4089                 else
4090                         r = -EFAULT;
4091                 break;
4092         }
4093         case KVM_S390_SET_IRQ_STATE: {
4094                 struct kvm_s390_irq_state irq_state;
4095
4096                 r = -EFAULT;
4097                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4098                         break;
4099                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4100                     irq_state.len == 0 ||
4101                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4102                         r = -EINVAL;
4103                         break;
4104                 }
4105                 /* do not use irq_state.flags, it will break old QEMUs */
4106                 r = kvm_s390_set_irq_state(vcpu,
4107                                            (void __user *) irq_state.buf,
4108                                            irq_state.len);
4109                 break;
4110         }
4111         case KVM_S390_GET_IRQ_STATE: {
4112                 struct kvm_s390_irq_state irq_state;
4113
4114                 r = -EFAULT;
4115                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4116                         break;
4117                 if (irq_state.len == 0) {
4118                         r = -EINVAL;
4119                         break;
4120                 }
4121                 /* do not use irq_state.flags, it will break old QEMUs */
4122                 r = kvm_s390_get_irq_state(vcpu,
4123                                            (__u8 __user *)  irq_state.buf,
4124                                            irq_state.len);
4125                 break;
4126         }
4127         default:
4128                 r = -ENOTTY;
4129         }
4130
4131         vcpu_put(vcpu);
4132         return r;
4133 }
4134
4135 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4136 {
4137 #ifdef CONFIG_KVM_S390_UCONTROL
4138         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4139                  && (kvm_is_ucontrol(vcpu->kvm))) {
4140                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4141                 get_page(vmf->page);
4142                 return 0;
4143         }
4144 #endif
4145         return VM_FAULT_SIGBUS;
4146 }
4147
4148 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4149                             unsigned long npages)
4150 {
4151         return 0;
4152 }
4153
4154 /* Section: memory related */
4155 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4156                                    struct kvm_memory_slot *memslot,
4157                                    const struct kvm_userspace_memory_region *mem,
4158                                    enum kvm_mr_change change)
4159 {
4160         /* A few sanity checks. We can have memory slots which have to be
4161            located/ended at a segment boundary (1MB). The memory in userland is
4162            ok to be fragmented into various different vmas. It is okay to mmap()
4163            and munmap() stuff in this slot after doing this call at any time */
4164
4165         if (mem->userspace_addr & 0xffffful)
4166                 return -EINVAL;
4167
4168         if (mem->memory_size & 0xffffful)
4169                 return -EINVAL;
4170
4171         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4172                 return -EINVAL;
4173
4174         return 0;
4175 }
4176
4177 void kvm_arch_commit_memory_region(struct kvm *kvm,
4178                                 const struct kvm_userspace_memory_region *mem,
4179                                 const struct kvm_memory_slot *old,
4180                                 const struct kvm_memory_slot *new,
4181                                 enum kvm_mr_change change)
4182 {
4183         int rc;
4184
4185         /* If the basics of the memslot do not change, we do not want
4186          * to update the gmap. Every update causes several unnecessary
4187          * segment translation exceptions. This is usually handled just
4188          * fine by the normal fault handler + gmap, but it will also
4189          * cause faults on the prefix page of running guest CPUs.
4190          */
4191         if (old->userspace_addr == mem->userspace_addr &&
4192             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4193             old->npages * PAGE_SIZE == mem->memory_size)
4194                 return;
4195
4196         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4197                 mem->guest_phys_addr, mem->memory_size);
4198         if (rc)
4199                 pr_warn("failed to commit memory region\n");
4200         return;
4201 }
4202
4203 static inline unsigned long nonhyp_mask(int i)
4204 {
4205         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4206
4207         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4208 }
4209
4210 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4211 {
4212         vcpu->valid_wakeup = false;
4213 }
4214
4215 static int __init kvm_s390_init(void)
4216 {
4217         int i;
4218
4219         if (!sclp.has_sief2) {
4220                 pr_info("SIE not available\n");
4221                 return -ENODEV;
4222         }
4223
4224         if (nested && hpage) {
4225                 pr_info("nested (vSIE) and hpage (huge page backing) can currently not be activated concurrently");
4226                 return -EINVAL;
4227         }
4228
4229         for (i = 0; i < 16; i++)
4230                 kvm_s390_fac_base[i] |=
4231                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4232
4233         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4234 }
4235
4236 static void __exit kvm_s390_exit(void)
4237 {
4238         kvm_exit();
4239 }
4240
4241 module_init(kvm_s390_init);
4242 module_exit(kvm_s390_exit);
4243
4244 /*
4245  * Enable autoloading of the kvm module.
4246  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4247  * since x86 takes a different approach.
4248  */
4249 #include <linux/miscdevice.h>
4250 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4251 MODULE_ALIAS("devname:kvm");