arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #include <linux/compiler.h>
  15 #include <linux/err.h>
  16 #include <linux/fs.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/init.h>
  19 #include <linux/kvm.h>
  20 #include <linux/kvm_host.h>
  21 #include <linux/mman.h>
  22 #include <linux/module.h>
  23 #include <linux/moduleparam.h>
  24 #include <linux/random.h>
  25 #include <linux/slab.h>
  26 #include <linux/timer.h>
  27 #include <linux/vmalloc.h>
  28 #include <linux/bitmap.h>
  29 #include <linux/sched/signal.h>
  30 #include <linux/string.h>
  31
  32 #include <asm/asm-offsets.h>
  33 #include <asm/lowcore.h>
  34 #include <asm/stp.h>
  35 #include <asm/pgtable.h>
  36 #include <asm/gmap.h>
  37 #include <asm/nmi.h>
  38 #include <asm/switch_to.h>
  39 #include <asm/isc.h>
  40 #include <asm/sclp.h>
  41 #include <asm/cpacf.h>
  42 #include <asm/timex.h>
  43 #include "kvm-s390.h"
  44 #include "gaccess.h"
  45
  46 #define KMSG_COMPONENT "kvm-s390"
  47 #undef pr_fmt
  48 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92         { "deliver_program", VCPU_STAT(deliver_program) },
  93         { "deliver_io", VCPU_STAT(deliver_io) },
  94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96         { "inject_ckc", VCPU_STAT(inject_ckc) },
  97         { "inject_cputm", VCPU_STAT(inject_cputm) },
  98         { "inject_external_call", VCPU_STAT(inject_external_call) },
  99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101         { "inject_io", VM_STAT(inject_io) },
 102         { "inject_mchk", VCPU_STAT(inject_mchk) },
 103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104         { "inject_program", VCPU_STAT(inject_program) },
 105         { "inject_restart", VCPU_STAT(inject_restart) },
 106         { "inject_service_signal", VM_STAT(inject_service_signal) },
 107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110         { "inject_virtio", VM_STAT(inject_virtio) },
 111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112         { "instruction_gs", VCPU_STAT(instruction_gs) },
 113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119         { "instruction_sck", VCPU_STAT(instruction_sck) },
 120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121         { "instruction_spx", VCPU_STAT(instruction_spx) },
 122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123         { "instruction_stap", VCPU_STAT(instruction_stap) },
 124         { "instruction_iske", VCPU_STAT(instruction_iske) },
 125         { "instruction_ri", VCPU_STAT(instruction_ri) },
 126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127         { "instruction_sske", VCPU_STAT(instruction_sske) },
 128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129         { "instruction_essa", VCPU_STAT(instruction_essa) },
 130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132         { "instruction_tb", VCPU_STAT(instruction_tb) },
 133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137         { "instruction_sie", VCPU_STAT(instruction_sie) },
 138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161         { NULL }
 162 };
 163
 164 struct kvm_s390_tod_clock_ext {
 165         __u8 epoch_idx;
 166         __u64 tod;
 167         __u8 reserved[7];
 168 } __packed;
 169
 170 /* allow nested virtualization in KVM (if enabled by user space) */
 171 static int nested;
 172 module_param(nested, int, S_IRUGO);
 173 MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175
 176 /*
 177  * For now we handle at most 16 double words as this is what the s390 base
 178  * kernel handles and stores in the prefix page. If we ever need to go beyond
 179  * this, this requires changes to code, but the external uapi can stay.
 180  */
 181 #define SIZE_INTERNAL 16
 182
 183 /*
 184  * Base feature mask that defines default mask for facilities. Consists of the
 185  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 186  */
 187 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 188 /*
 189  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 190  * and defines the facilities that can be enabled via a cpu model.
 191  */
 192 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 193
 194 static unsigned long kvm_s390_fac_size(void)
 195 {
 196         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 197         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 198         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 199                 sizeof(S390_lowcore.stfle_fac_list));
 200
 201         return SIZE_INTERNAL;
 202 }
 203
 204 /* available cpu features supported by kvm */
 205 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 206 /* available subfunctions indicated via query / "test bit" */
 207 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 208
 209 static struct gmap_notifier gmap_notifier;
 210 static struct gmap_notifier vsie_gmap_notifier;
 211 debug_info_t *kvm_s390_dbf;
 212
 213 /* Section: not file related */
 214 int kvm_arch_hardware_enable(void)
 215 {
 216         /* every s390 is virtualization enabled ;-) */
 217         return 0;
 218 }
 219
 220 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 221                               unsigned long end);
 222
 223 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 224 {
 225         u8 delta_idx = 0;
 226
 227         /*
 228          * The TOD jumps by delta, we have to compensate this by adding
 229          * -delta to the epoch.
 230          */
 231         delta = -delta;
 232
 233         /* sign-extension - we're adding to signed values below */
 234         if ((s64)delta < 0)
 235                 delta_idx = -1;
 236
 237         scb->epoch += delta;
 238         if (scb->ecd & ECD_MEF) {
 239                 scb->epdx += delta_idx;
 240                 if (scb->epoch < delta)
 241                         scb->epdx += 1;
 242         }
 243 }
 244
 245 /*
 246  * This callback is executed during stop_machine(). All CPUs are therefore
 247  * temporarily stopped. In order not to change guest behavior, we have to
 248  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 249  * so a CPU won't be stopped while calculating with the epoch.
 250  */
 251 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 252                           void *v)
 253 {
 254         struct kvm *kvm;
 255         struct kvm_vcpu *vcpu;
 256         int i;
 257         unsigned long long *delta = v;
 258
 259         list_for_each_entry(kvm, &vm_list, vm_list) {
 260                 kvm_for_each_vcpu(i, vcpu, kvm) {
 261                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 262                         if (i == 0) {
 263                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 264                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 265                         }
 266                         if (vcpu->arch.cputm_enabled)
 267                                 vcpu->arch.cputm_start += *delta;
 268                         if (vcpu->arch.vsie_block)
 269                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 270                                                    *delta);
 271                 }
 272         }
 273         return NOTIFY_OK;
 274 }
 275
 276 static struct notifier_block kvm_clock_notifier = {
 277         .notifier_call = kvm_clock_sync,
 278 };
 279
 280 int kvm_arch_hardware_setup(void)
 281 {
 282         gmap_notifier.notifier_call = kvm_gmap_notifier;
 283         gmap_register_pte_notifier(&gmap_notifier);
 284         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 285         gmap_register_pte_notifier(&vsie_gmap_notifier);
 286         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 287                                        &kvm_clock_notifier);
 288         return 0;
 289 }
 290
 291 void kvm_arch_hardware_unsetup(void)
 292 {
 293         gmap_unregister_pte_notifier(&gmap_notifier);
 294         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 295         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 296                                          &kvm_clock_notifier);
 297 }
 298
 299 static void allow_cpu_feat(unsigned long nr)
 300 {
 301         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 302 }
 303
 304 static inline int plo_test_bit(unsigned char nr)
 305 {
 306         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 307         int cc;
 308
 309         asm volatile(
 310                 /* Parameter registers are ignored for "test bit" */
 311                 "       plo     0,0,0,0(0)\n"
 312                 "       ipm     %0\n"
 313                 "       srl     %0,28\n"
 314                 : "=d" (cc)
 315                 : "d" (r0)
 316                 : "cc");
 317         return cc == 0;
 318 }
 319
 320 static void kvm_s390_cpu_feat_init(void)
 321 {
 322         int i;
 323
 324         for (i = 0; i < 256; ++i) {
 325                 if (plo_test_bit(i))
 326                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 327         }
 328
 329         if (test_facility(28)) /* TOD-clock steering */
 330                 ptff(kvm_s390_available_subfunc.ptff,
 331                      sizeof(kvm_s390_available_subfunc.ptff),
 332                      PTFF_QAF);
 333
 334         if (test_facility(17)) { /* MSA */
 335                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 336                               kvm_s390_available_subfunc.kmac);
 337                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 338                               kvm_s390_available_subfunc.kmc);
 339                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 340                               kvm_s390_available_subfunc.km);
 341                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 342                               kvm_s390_available_subfunc.kimd);
 343                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 344                               kvm_s390_available_subfunc.klmd);
 345         }
 346         if (test_facility(76)) /* MSA3 */
 347                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 348                               kvm_s390_available_subfunc.pckmo);
 349         if (test_facility(77)) { /* MSA4 */
 350                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 351                               kvm_s390_available_subfunc.kmctr);
 352                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 353                               kvm_s390_available_subfunc.kmf);
 354                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 355                               kvm_s390_available_subfunc.kmo);
 356                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 357                               kvm_s390_available_subfunc.pcc);
 358         }
 359         if (test_facility(57)) /* MSA5 */
 360                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 361                               kvm_s390_available_subfunc.ppno);
 362
 363         if (test_facility(146)) /* MSA8 */
 364                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 365                               kvm_s390_available_subfunc.kma);
 366
 367         if (MACHINE_HAS_ESOP)
 368                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 369         /*
 370          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 371          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 372          */
 373         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 374             !test_facility(3) || !nested)
 375                 return;
 376         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 377         if (sclp.has_64bscao)
 378                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 379         if (sclp.has_siif)
 380                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 381         if (sclp.has_gpere)
 382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 383         if (sclp.has_gsls)
 384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 385         if (sclp.has_ib)
 386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 387         if (sclp.has_cei)
 388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 389         if (sclp.has_ibs)
 390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 391         if (sclp.has_kss)
 392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 393         /*
 394          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 395          * all skey handling functions read/set the skey from the PGSTE
 396          * instead of the real storage key.
 397          *
 398          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 399          * pages being detected as preserved although they are resident.
 400          *
 401          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 402          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 403          *
 404          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 405          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 406          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 407          *
 408          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 409          * cannot easily shadow the SCA because of the ipte lock.
 410          */
 411 }
 412
 413 int kvm_arch_init(void *opaque)
 414 {
 415         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 416         if (!kvm_s390_dbf)
 417                 return -ENOMEM;
 418
 419         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 420                 debug_unregister(kvm_s390_dbf);
 421                 return -ENOMEM;
 422         }
 423
 424         kvm_s390_cpu_feat_init();
 425
 426         /* Register floating interrupt controller interface. */
 427         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 428 }
 429
 430 void kvm_arch_exit(void)
 431 {
 432         debug_unregister(kvm_s390_dbf);
 433 }
 434
 435 /* Section: device related */
 436 long kvm_arch_dev_ioctl(struct file *filp,
 437                         unsigned int ioctl, unsigned long arg)
 438 {
 439         if (ioctl == KVM_S390_ENABLE_SIE)
 440                 return s390_enable_sie();
 441         return -EINVAL;
 442 }
 443
 444 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 445 {
 446         int r;
 447
 448         switch (ext) {
 449         case KVM_CAP_S390_PSW:
 450         case KVM_CAP_S390_GMAP:
 451         case KVM_CAP_SYNC_MMU:
 452 #ifdef CONFIG_KVM_S390_UCONTROL
 453         case KVM_CAP_S390_UCONTROL:
 454 #endif
 455         case KVM_CAP_ASYNC_PF:
 456         case KVM_CAP_SYNC_REGS:
 457         case KVM_CAP_ONE_REG:
 458         case KVM_CAP_ENABLE_CAP:
 459         case KVM_CAP_S390_CSS_SUPPORT:
 460         case KVM_CAP_IOEVENTFD:
 461         case KVM_CAP_DEVICE_CTRL:
 462         case KVM_CAP_ENABLE_CAP_VM:
 463         case KVM_CAP_S390_IRQCHIP:
 464         case KVM_CAP_VM_ATTRIBUTES:
 465         case KVM_CAP_MP_STATE:
 466         case KVM_CAP_IMMEDIATE_EXIT:
 467         case KVM_CAP_S390_INJECT_IRQ:
 468         case KVM_CAP_S390_USER_SIGP:
 469         case KVM_CAP_S390_USER_STSI:
 470         case KVM_CAP_S390_SKEYS:
 471         case KVM_CAP_S390_IRQ_STATE:
 472         case KVM_CAP_S390_USER_INSTR0:
 473         case KVM_CAP_S390_CMMA_MIGRATION:
 474         case KVM_CAP_S390_AIS:
 475         case KVM_CAP_S390_AIS_MIGRATION:
 476                 r = 1;
 477                 break;
 478         case KVM_CAP_S390_MEM_OP:
 479                 r = MEM_OP_MAX_SIZE;
 480                 break;
 481         case KVM_CAP_NR_VCPUS:
 482         case KVM_CAP_MAX_VCPUS:
 483                 r = KVM_S390_BSCA_CPU_SLOTS;
 484                 if (!kvm_s390_use_sca_entries())
 485                         r = KVM_MAX_VCPUS;
 486                 else if (sclp.has_esca && sclp.has_64bscao)
 487                         r = KVM_S390_ESCA_CPU_SLOTS;
 488                 break;
 489         case KVM_CAP_NR_MEMSLOTS:
 490                 r = KVM_USER_MEM_SLOTS;
 491                 break;
 492         case KVM_CAP_S390_COW:
 493                 r = MACHINE_HAS_ESOP;
 494                 break;
 495         case KVM_CAP_S390_VECTOR_REGISTERS:
 496                 r = MACHINE_HAS_VX;
 497                 break;
 498         case KVM_CAP_S390_RI:
 499                 r = test_facility(64);
 500                 break;
 501         case KVM_CAP_S390_GS:
 502                 r = test_facility(133);
 503                 break;
 504         case KVM_CAP_S390_BPB:
 505                 r = test_facility(82);
 506                 break;
 507         default:
 508                 r = 0;
 509         }
 510         return r;
 511 }
 512
 513 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 514                                         struct kvm_memory_slot *memslot)
 515 {
 516         gfn_t cur_gfn, last_gfn;
 517         unsigned long address;
 518         struct gmap *gmap = kvm->arch.gmap;
 519
 520         /* Loop over all guest pages */
 521         last_gfn = memslot->base_gfn + memslot->npages;
 522         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 523                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 524
 525                 if (test_and_clear_guest_dirty(gmap->mm, address))
 526                         mark_page_dirty(kvm, cur_gfn);
 527                 if (fatal_signal_pending(current))
 528                         return;
 529                 cond_resched();
 530         }
 531 }
 532
 533 /* Section: vm related */
 534 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 535
 536 /*
 537  * Get (and clear) the dirty memory log for a memory slot.
 538  */
 539 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 540                                struct kvm_dirty_log *log)
 541 {
 542         int r;
 543         unsigned long n;
 544         struct kvm_memslots *slots;
 545         struct kvm_memory_slot *memslot;
 546         int is_dirty = 0;
 547
 548         if (kvm_is_ucontrol(kvm))
 549                 return -EINVAL;
 550
 551         mutex_lock(&kvm->slots_lock);
 552
 553         r = -EINVAL;
 554         if (log->slot >= KVM_USER_MEM_SLOTS)
 555                 goto out;
 556
 557         slots = kvm_memslots(kvm);
 558         memslot = id_to_memslot(slots, log->slot);
 559         r = -ENOENT;
 560         if (!memslot->dirty_bitmap)
 561                 goto out;
 562
 563         kvm_s390_sync_dirty_log(kvm, memslot);
 564         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 565         if (r)
 566                 goto out;
 567
 568         /* Clear the dirty log */
 569         if (is_dirty) {
 570                 n = kvm_dirty_bitmap_bytes(memslot);
 571                 memset(memslot->dirty_bitmap, 0, n);
 572         }
 573         r = 0;
 574 out:
 575         mutex_unlock(&kvm->slots_lock);
 576         return r;
 577 }
 578
 579 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 580 {
 581         unsigned int i;
 582         struct kvm_vcpu *vcpu;
 583
 584         kvm_for_each_vcpu(i, vcpu, kvm) {
 585                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 586         }
 587 }
 588
 589 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 590 {
 591         int r;
 592
 593         if (cap->flags)
 594                 return -EINVAL;
 595
 596         switch (cap->cap) {
 597         case KVM_CAP_S390_IRQCHIP:
 598                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 599                 kvm->arch.use_irqchip = 1;
 600                 r = 0;
 601                 break;
 602         case KVM_CAP_S390_USER_SIGP:
 603                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 604                 kvm->arch.user_sigp = 1;
 605                 r = 0;
 606                 break;
 607         case KVM_CAP_S390_VECTOR_REGISTERS:
 608                 mutex_lock(&kvm->lock);
 609                 if (kvm->created_vcpus) {
 610                         r = -EBUSY;
 611                 } else if (MACHINE_HAS_VX) {
 612                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 613                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 614                         if (test_facility(134)) {
 615                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 616                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 617                         }
 618                         if (test_facility(135)) {
 619                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 620                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 621                         }
 622                         r = 0;
 623                 } else
 624                         r = -EINVAL;
 625                 mutex_unlock(&kvm->lock);
 626                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 627                          r ? "(not available)" : "(success)");
 628                 break;
 629         case KVM_CAP_S390_RI:
 630                 r = -EINVAL;
 631                 mutex_lock(&kvm->lock);
 632                 if (kvm->created_vcpus) {
 633                         r = -EBUSY;
 634                 } else if (test_facility(64)) {
 635                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 636                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 637                         r = 0;
 638                 }
 639                 mutex_unlock(&kvm->lock);
 640                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 641                          r ? "(not available)" : "(success)");
 642                 break;
 643         case KVM_CAP_S390_AIS:
 644                 mutex_lock(&kvm->lock);
 645                 if (kvm->created_vcpus) {
 646                         r = -EBUSY;
 647                 } else {
 648                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 649                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 650                         r = 0;
 651                 }
 652                 mutex_unlock(&kvm->lock);
 653                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 654                          r ? "(not available)" : "(success)");
 655                 break;
 656         case KVM_CAP_S390_GS:
 657                 r = -EINVAL;
 658                 mutex_lock(&kvm->lock);
 659                 if (kvm->created_vcpus) {
 660                         r = -EBUSY;
 661                 } else if (test_facility(133)) {
 662                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 663                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 664                         r = 0;
 665                 }
 666                 mutex_unlock(&kvm->lock);
 667                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 668                          r ? "(not available)" : "(success)");
 669                 break;
 670         case KVM_CAP_S390_USER_STSI:
 671                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 672                 kvm->arch.user_stsi = 1;
 673                 r = 0;
 674                 break;
 675         case KVM_CAP_S390_USER_INSTR0:
 676                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 677                 kvm->arch.user_instr0 = 1;
 678                 icpt_operexc_on_all_vcpus(kvm);
 679                 r = 0;
 680                 break;
 681         default:
 682                 r = -EINVAL;
 683                 break;
 684         }
 685         return r;
 686 }
 687
 688 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 689 {
 690         int ret;
 691
 692         switch (attr->attr) {
 693         case KVM_S390_VM_MEM_LIMIT_SIZE:
 694                 ret = 0;
 695                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 696                          kvm->arch.mem_limit);
 697                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 698                         ret = -EFAULT;
 699                 break;
 700         default:
 701                 ret = -ENXIO;
 702                 break;
 703         }
 704         return ret;
 705 }
 706
 707 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 708 {
 709         int ret;
 710         unsigned int idx;
 711         switch (attr->attr) {
 712         case KVM_S390_VM_MEM_ENABLE_CMMA:
 713                 ret = -ENXIO;
 714                 if (!sclp.has_cmma)
 715                         break;
 716
 717                 ret = -EBUSY;
 718                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 719                 mutex_lock(&kvm->lock);
 720                 if (!kvm->created_vcpus) {
 721                         kvm->arch.use_cmma = 1;
 722                         /* Not compatible with cmma. */
 723                         kvm->arch.use_pfmfi = 0;
 724                         ret = 0;
 725                 }
 726                 mutex_unlock(&kvm->lock);
 727                 break;
 728         case KVM_S390_VM_MEM_CLR_CMMA:
 729                 ret = -ENXIO;
 730                 if (!sclp.has_cmma)
 731                         break;
 732                 ret = -EINVAL;
 733                 if (!kvm->arch.use_cmma)
 734                         break;
 735
 736                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 737                 mutex_lock(&kvm->lock);
 738                 idx = srcu_read_lock(&kvm->srcu);
 739                 s390_reset_cmma(kvm->arch.gmap->mm);
 740                 srcu_read_unlock(&kvm->srcu, idx);
 741                 mutex_unlock(&kvm->lock);
 742                 ret = 0;
 743                 break;
 744         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 745                 unsigned long new_limit;
 746
 747                 if (kvm_is_ucontrol(kvm))
 748                         return -EINVAL;
 749
 750                 if (get_user(new_limit, (u64 __user *)attr->addr))
 751                         return -EFAULT;
 752
 753                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 754                     new_limit > kvm->arch.mem_limit)
 755                         return -E2BIG;
 756
 757                 if (!new_limit)
 758                         return -EINVAL;
 759
 760                 /* gmap_create takes last usable address */
 761                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 762                         new_limit -= 1;
 763
 764                 ret = -EBUSY;
 765                 mutex_lock(&kvm->lock);
 766                 if (!kvm->created_vcpus) {
 767                         /* gmap_create will round the limit up */
 768                         struct gmap *new = gmap_create(current->mm, new_limit);
 769
 770                         if (!new) {
 771                                 ret = -ENOMEM;
 772                         } else {
 773                                 gmap_remove(kvm->arch.gmap);
 774                                 new->private = kvm;
 775                                 kvm->arch.gmap = new;
 776                                 ret = 0;
 777                         }
 778                 }
 779                 mutex_unlock(&kvm->lock);
 780                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 781                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 782                          (void *) kvm->arch.gmap->asce);
 783                 break;
 784         }
 785         default:
 786                 ret = -ENXIO;
 787                 break;
 788         }
 789         return ret;
 790 }
 791
 792 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 793
 794 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 795 {
 796         struct kvm_vcpu *vcpu;
 797         int i;
 798
 799         kvm_s390_vcpu_block_all(kvm);
 800
 801         kvm_for_each_vcpu(i, vcpu, kvm)
 802                 kvm_s390_vcpu_crypto_setup(vcpu);
 803
 804         kvm_s390_vcpu_unblock_all(kvm);
 805 }
 806
 807 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 808 {
 809         if (!test_kvm_facility(kvm, 76))
 810                 return -EINVAL;
 811
 812         mutex_lock(&kvm->lock);
 813         switch (attr->attr) {
 814         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 815                 get_random_bytes(
 816                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 817                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 818                 kvm->arch.crypto.aes_kw = 1;
 819                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 820                 break;
 821         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 822                 get_random_bytes(
 823                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 824                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 825                 kvm->arch.crypto.dea_kw = 1;
 826                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 827                 break;
 828         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 829                 kvm->arch.crypto.aes_kw = 0;
 830                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 831                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 832                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 833                 break;
 834         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 835                 kvm->arch.crypto.dea_kw = 0;
 836                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 837                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 838                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 839                 break;
 840         default:
 841                 mutex_unlock(&kvm->lock);
 842                 return -ENXIO;
 843         }
 844
 845         kvm_s390_vcpu_crypto_reset_all(kvm);
 846         mutex_unlock(&kvm->lock);
 847         return 0;
 848 }
 849
 850 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 851 {
 852         int cx;
 853         struct kvm_vcpu *vcpu;
 854
 855         kvm_for_each_vcpu(cx, vcpu, kvm)
 856                 kvm_s390_sync_request(req, vcpu);
 857 }
 858
 859 /*
 860  * Must be called with kvm->srcu held to avoid races on memslots, and with
 861  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 862  */
 863 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 864 {
 865         struct kvm_s390_migration_state *mgs;
 866         struct kvm_memory_slot *ms;
 867         /* should be the only one */
 868         struct kvm_memslots *slots;
 869         unsigned long ram_pages;
 870         int slotnr;
 871
 872         /* migration mode already enabled */
 873         if (kvm->arch.migration_state)
 874                 return 0;
 875
 876         slots = kvm_memslots(kvm);
 877         if (!slots || !slots->used_slots)
 878                 return -EINVAL;
 879
 880         mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
 881         if (!mgs)
 882                 return -ENOMEM;
 883         kvm->arch.migration_state = mgs;
 884
 885         if (kvm->arch.use_cmma) {
 886                 /*
 887                  * Get the first slot. They are reverse sorted by base_gfn, so
 888                  * the first slot is also the one at the end of the address
 889                  * space. We have verified above that at least one slot is
 890                  * present.
 891                  */
 892                 ms = slots->memslots;
 893                 /* round up so we only use full longs */
 894                 ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
 895                 /* allocate enough bytes to store all the bits */
 896                 mgs->pgste_bitmap = vmalloc(ram_pages / 8);
 897                 if (!mgs->pgste_bitmap) {
 898                         kfree(mgs);
 899                         kvm->arch.migration_state = NULL;
 900                         return -ENOMEM;
 901                 }
 902
 903                 mgs->bitmap_size = ram_pages;
 904                 atomic64_set(&mgs->dirty_pages, ram_pages);
 905                 /* mark all the pages in active slots as dirty */
 906                 for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 907                         ms = slots->memslots + slotnr;
 908                         bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
 909                 }
 910
 911                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 912         }
 913         return 0;
 914 }
 915
 916 /*
 917  * Must be called with kvm->slots_lock to avoid races with ourselves and
 918  * kvm_s390_vm_start_migration.
 919  */
 920 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 921 {
 922         struct kvm_s390_migration_state *mgs;
 923
 924         /* migration mode already disabled */
 925         if (!kvm->arch.migration_state)
 926                 return 0;
 927         mgs = kvm->arch.migration_state;
 928         kvm->arch.migration_state = NULL;
 929
 930         if (kvm->arch.use_cmma) {
 931                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
 932                 /* We have to wait for the essa emulation to finish */
 933                 synchronize_srcu(&kvm->srcu);
 934                 vfree(mgs->pgste_bitmap);
 935         }
 936         kfree(mgs);
 937         return 0;
 938 }
 939
 940 static int kvm_s390_vm_set_migration(struct kvm *kvm,
 941                                      struct kvm_device_attr *attr)
 942 {
 943         int res = -ENXIO;
 944
 945         mutex_lock(&kvm->slots_lock);
 946         switch (attr->attr) {
 947         case KVM_S390_VM_MIGRATION_START:
 948                 res = kvm_s390_vm_start_migration(kvm);
 949                 break;
 950         case KVM_S390_VM_MIGRATION_STOP:
 951                 res = kvm_s390_vm_stop_migration(kvm);
 952                 break;
 953         default:
 954                 break;
 955         }
 956         mutex_unlock(&kvm->slots_lock);
 957
 958         return res;
 959 }
 960
 961 static int kvm_s390_vm_get_migration(struct kvm *kvm,
 962                                      struct kvm_device_attr *attr)
 963 {
 964         u64 mig = (kvm->arch.migration_state != NULL);
 965
 966         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
 967                 return -ENXIO;
 968
 969         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
 970                 return -EFAULT;
 971         return 0;
 972 }
 973
 974 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
 975 {
 976         struct kvm_s390_vm_tod_clock gtod;
 977
 978         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 979                 return -EFAULT;
 980
 981         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
 982                 return -EINVAL;
 983         kvm_s390_set_tod_clock(kvm, &gtod);
 984
 985         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
 986                 gtod.epoch_idx, gtod.tod);
 987
 988         return 0;
 989 }
 990
 991 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 992 {
 993         u8 gtod_high;
 994
 995         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 996                                            sizeof(gtod_high)))
 997                 return -EFAULT;
 998
 999         if (gtod_high != 0)
1000                 return -EINVAL;
1001         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1002
1003         return 0;
1004 }
1005
1006 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1007 {
1008         struct kvm_s390_vm_tod_clock gtod = { 0 };
1009
1010         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1011                            sizeof(gtod.tod)))
1012                 return -EFAULT;
1013
1014         kvm_s390_set_tod_clock(kvm, &gtod);
1015         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1016         return 0;
1017 }
1018
1019 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1020 {
1021         int ret;
1022
1023         if (attr->flags)
1024                 return -EINVAL;
1025
1026         switch (attr->attr) {
1027         case KVM_S390_VM_TOD_EXT:
1028                 ret = kvm_s390_set_tod_ext(kvm, attr);
1029                 break;
1030         case KVM_S390_VM_TOD_HIGH:
1031                 ret = kvm_s390_set_tod_high(kvm, attr);
1032                 break;
1033         case KVM_S390_VM_TOD_LOW:
1034                 ret = kvm_s390_set_tod_low(kvm, attr);
1035                 break;
1036         default:
1037                 ret = -ENXIO;
1038                 break;
1039         }
1040         return ret;
1041 }
1042
1043 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1044                                    struct kvm_s390_vm_tod_clock *gtod)
1045 {
1046         struct kvm_s390_tod_clock_ext htod;
1047
1048         preempt_disable();
1049
1050         get_tod_clock_ext((char *)&htod);
1051
1052         gtod->tod = htod.tod + kvm->arch.epoch;
1053         gtod->epoch_idx = 0;
1054         if (test_kvm_facility(kvm, 139)) {
1055                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1056                 if (gtod->tod < htod.tod)
1057                         gtod->epoch_idx += 1;
1058         }
1059
1060         preempt_enable();
1061 }
1062
1063 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1064 {
1065         struct kvm_s390_vm_tod_clock gtod;
1066
1067         memset(&gtod, 0, sizeof(gtod));
1068         kvm_s390_get_tod_clock(kvm, &gtod);
1069         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1070                 return -EFAULT;
1071
1072         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1073                 gtod.epoch_idx, gtod.tod);
1074         return 0;
1075 }
1076
1077 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1078 {
1079         u8 gtod_high = 0;
1080
1081         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1082                                          sizeof(gtod_high)))
1083                 return -EFAULT;
1084         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1085
1086         return 0;
1087 }
1088
1089 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1090 {
1091         u64 gtod;
1092
1093         gtod = kvm_s390_get_tod_clock_fast(kvm);
1094         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1095                 return -EFAULT;
1096         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1097
1098         return 0;
1099 }
1100
1101 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         int ret;
1104
1105         if (attr->flags)
1106                 return -EINVAL;
1107
1108         switch (attr->attr) {
1109         case KVM_S390_VM_TOD_EXT:
1110                 ret = kvm_s390_get_tod_ext(kvm, attr);
1111                 break;
1112         case KVM_S390_VM_TOD_HIGH:
1113                 ret = kvm_s390_get_tod_high(kvm, attr);
1114                 break;
1115         case KVM_S390_VM_TOD_LOW:
1116                 ret = kvm_s390_get_tod_low(kvm, attr);
1117                 break;
1118         default:
1119                 ret = -ENXIO;
1120                 break;
1121         }
1122         return ret;
1123 }
1124
1125 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1126 {
1127         struct kvm_s390_vm_cpu_processor *proc;
1128         u16 lowest_ibc, unblocked_ibc;
1129         int ret = 0;
1130
1131         mutex_lock(&kvm->lock);
1132         if (kvm->created_vcpus) {
1133                 ret = -EBUSY;
1134                 goto out;
1135         }
1136         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1137         if (!proc) {
1138                 ret = -ENOMEM;
1139                 goto out;
1140         }
1141         if (!copy_from_user(proc, (void __user *)attr->addr,
1142                             sizeof(*proc))) {
1143                 kvm->arch.model.cpuid = proc->cpuid;
1144                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1145                 unblocked_ibc = sclp.ibc & 0xfff;
1146                 if (lowest_ibc && proc->ibc) {
1147                         if (proc->ibc > unblocked_ibc)
1148                                 kvm->arch.model.ibc = unblocked_ibc;
1149                         else if (proc->ibc < lowest_ibc)
1150                                 kvm->arch.model.ibc = lowest_ibc;
1151                         else
1152                                 kvm->arch.model.ibc = proc->ibc;
1153                 }
1154                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1155                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1156                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1157                          kvm->arch.model.ibc,
1158                          kvm->arch.model.cpuid);
1159                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1160                          kvm->arch.model.fac_list[0],
1161                          kvm->arch.model.fac_list[1],
1162                          kvm->arch.model.fac_list[2]);
1163         } else
1164                 ret = -EFAULT;
1165         kfree(proc);
1166 out:
1167         mutex_unlock(&kvm->lock);
1168         return ret;
1169 }
1170
1171 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1172                                        struct kvm_device_attr *attr)
1173 {
1174         struct kvm_s390_vm_cpu_feat data;
1175
1176         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1177                 return -EFAULT;
1178         if (!bitmap_subset((unsigned long *) data.feat,
1179                            kvm_s390_available_cpu_feat,
1180                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1181                 return -EINVAL;
1182
1183         mutex_lock(&kvm->lock);
1184         if (kvm->created_vcpus) {
1185                 mutex_unlock(&kvm->lock);
1186                 return -EBUSY;
1187         }
1188         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1189                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1190         mutex_unlock(&kvm->lock);
1191         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1192                          data.feat[0],
1193                          data.feat[1],
1194                          data.feat[2]);
1195         return 0;
1196 }
1197
1198 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1199                                           struct kvm_device_attr *attr)
1200 {
1201         /*
1202          * Once supported by kernel + hw, we have to store the subfunctions
1203          * in kvm->arch and remember that user space configured them.
1204          */
1205         return -ENXIO;
1206 }
1207
1208 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1209 {
1210         int ret = -ENXIO;
1211
1212         switch (attr->attr) {
1213         case KVM_S390_VM_CPU_PROCESSOR:
1214                 ret = kvm_s390_set_processor(kvm, attr);
1215                 break;
1216         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1217                 ret = kvm_s390_set_processor_feat(kvm, attr);
1218                 break;
1219         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1220                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1221                 break;
1222         }
1223         return ret;
1224 }
1225
1226 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1227 {
1228         struct kvm_s390_vm_cpu_processor *proc;
1229         int ret = 0;
1230
1231         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1232         if (!proc) {
1233                 ret = -ENOMEM;
1234                 goto out;
1235         }
1236         proc->cpuid = kvm->arch.model.cpuid;
1237         proc->ibc = kvm->arch.model.ibc;
1238         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1239                S390_ARCH_FAC_LIST_SIZE_BYTE);
1240         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1241                  kvm->arch.model.ibc,
1242                  kvm->arch.model.cpuid);
1243         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1244                  kvm->arch.model.fac_list[0],
1245                  kvm->arch.model.fac_list[1],
1246                  kvm->arch.model.fac_list[2]);
1247         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1248                 ret = -EFAULT;
1249         kfree(proc);
1250 out:
1251         return ret;
1252 }
1253
1254 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1255 {
1256         struct kvm_s390_vm_cpu_machine *mach;
1257         int ret = 0;
1258
1259         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1260         if (!mach) {
1261                 ret = -ENOMEM;
1262                 goto out;
1263         }
1264         get_cpu_id((struct cpuid *) &mach->cpuid);
1265         mach->ibc = sclp.ibc;
1266         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1267                S390_ARCH_FAC_LIST_SIZE_BYTE);
1268         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1269                sizeof(S390_lowcore.stfle_fac_list));
1270         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1271                  kvm->arch.model.ibc,
1272                  kvm->arch.model.cpuid);
1273         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1274                  mach->fac_mask[0],
1275                  mach->fac_mask[1],
1276                  mach->fac_mask[2]);
1277         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1278                  mach->fac_list[0],
1279                  mach->fac_list[1],
1280                  mach->fac_list[2]);
1281         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1282                 ret = -EFAULT;
1283         kfree(mach);
1284 out:
1285         return ret;
1286 }
1287
1288 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1289                                        struct kvm_device_attr *attr)
1290 {
1291         struct kvm_s390_vm_cpu_feat data;
1292
1293         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1294                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1295         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1296                 return -EFAULT;
1297         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1298                          data.feat[0],
1299                          data.feat[1],
1300                          data.feat[2]);
1301         return 0;
1302 }
1303
1304 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1305                                      struct kvm_device_attr *attr)
1306 {
1307         struct kvm_s390_vm_cpu_feat data;
1308
1309         bitmap_copy((unsigned long *) data.feat,
1310                     kvm_s390_available_cpu_feat,
1311                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1312         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1313                 return -EFAULT;
1314         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1315                          data.feat[0],
1316                          data.feat[1],
1317                          data.feat[2]);
1318         return 0;
1319 }
1320
1321 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1322                                           struct kvm_device_attr *attr)
1323 {
1324         /*
1325          * Once we can actually configure subfunctions (kernel + hw support),
1326          * we have to check if they were already set by user space, if so copy
1327          * them from kvm->arch.
1328          */
1329         return -ENXIO;
1330 }
1331
1332 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1333                                         struct kvm_device_attr *attr)
1334 {
1335         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1336             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1337                 return -EFAULT;
1338         return 0;
1339 }
1340 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1341 {
1342         int ret = -ENXIO;
1343
1344         switch (attr->attr) {
1345         case KVM_S390_VM_CPU_PROCESSOR:
1346                 ret = kvm_s390_get_processor(kvm, attr);
1347                 break;
1348         case KVM_S390_VM_CPU_MACHINE:
1349                 ret = kvm_s390_get_machine(kvm, attr);
1350                 break;
1351         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1352                 ret = kvm_s390_get_processor_feat(kvm, attr);
1353                 break;
1354         case KVM_S390_VM_CPU_MACHINE_FEAT:
1355                 ret = kvm_s390_get_machine_feat(kvm, attr);
1356                 break;
1357         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1358                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1359                 break;
1360         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1361                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1362                 break;
1363         }
1364         return ret;
1365 }
1366
1367 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1368 {
1369         int ret;
1370
1371         switch (attr->group) {
1372         case KVM_S390_VM_MEM_CTRL:
1373                 ret = kvm_s390_set_mem_control(kvm, attr);
1374                 break;
1375         case KVM_S390_VM_TOD:
1376                 ret = kvm_s390_set_tod(kvm, attr);
1377                 break;
1378         case KVM_S390_VM_CPU_MODEL:
1379                 ret = kvm_s390_set_cpu_model(kvm, attr);
1380                 break;
1381         case KVM_S390_VM_CRYPTO:
1382                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1383                 break;
1384         case KVM_S390_VM_MIGRATION:
1385                 ret = kvm_s390_vm_set_migration(kvm, attr);
1386                 break;
1387         default:
1388                 ret = -ENXIO;
1389                 break;
1390         }
1391
1392         return ret;
1393 }
1394
1395 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1396 {
1397         int ret;
1398
1399         switch (attr->group) {
1400         case KVM_S390_VM_MEM_CTRL:
1401                 ret = kvm_s390_get_mem_control(kvm, attr);
1402                 break;
1403         case KVM_S390_VM_TOD:
1404                 ret = kvm_s390_get_tod(kvm, attr);
1405                 break;
1406         case KVM_S390_VM_CPU_MODEL:
1407                 ret = kvm_s390_get_cpu_model(kvm, attr);
1408                 break;
1409         case KVM_S390_VM_MIGRATION:
1410                 ret = kvm_s390_vm_get_migration(kvm, attr);
1411                 break;
1412         default:
1413                 ret = -ENXIO;
1414                 break;
1415         }
1416
1417         return ret;
1418 }
1419
1420 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1421 {
1422         int ret;
1423
1424         switch (attr->group) {
1425         case KVM_S390_VM_MEM_CTRL:
1426                 switch (attr->attr) {
1427                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1428                 case KVM_S390_VM_MEM_CLR_CMMA:
1429                         ret = sclp.has_cmma ? 0 : -ENXIO;
1430                         break;
1431                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1432                         ret = 0;
1433                         break;
1434                 default:
1435                         ret = -ENXIO;
1436                         break;
1437                 }
1438                 break;
1439         case KVM_S390_VM_TOD:
1440                 switch (attr->attr) {
1441                 case KVM_S390_VM_TOD_LOW:
1442                 case KVM_S390_VM_TOD_HIGH:
1443                         ret = 0;
1444                         break;
1445                 default:
1446                         ret = -ENXIO;
1447                         break;
1448                 }
1449                 break;
1450         case KVM_S390_VM_CPU_MODEL:
1451                 switch (attr->attr) {
1452                 case KVM_S390_VM_CPU_PROCESSOR:
1453                 case KVM_S390_VM_CPU_MACHINE:
1454                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1455                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1456                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1457                         ret = 0;
1458                         break;
1459                 /* configuring subfunctions is not supported yet */
1460                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1461                 default:
1462                         ret = -ENXIO;
1463                         break;
1464                 }
1465                 break;
1466         case KVM_S390_VM_CRYPTO:
1467                 switch (attr->attr) {
1468                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1469                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1470                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1471                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1472                         ret = 0;
1473                         break;
1474                 default:
1475                         ret = -ENXIO;
1476                         break;
1477                 }
1478                 break;
1479         case KVM_S390_VM_MIGRATION:
1480                 ret = 0;
1481                 break;
1482         default:
1483                 ret = -ENXIO;
1484                 break;
1485         }
1486
1487         return ret;
1488 }
1489
1490 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1491 {
1492         uint8_t *keys;
1493         uint64_t hva;
1494         int srcu_idx, i, r = 0;
1495
1496         if (args->flags != 0)
1497                 return -EINVAL;
1498
1499         /* Is this guest using storage keys? */
1500         if (!mm_uses_skeys(current->mm))
1501                 return KVM_S390_GET_SKEYS_NONE;
1502
1503         /* Enforce sane limit on memory allocation */
1504         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1505                 return -EINVAL;
1506
1507         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1508         if (!keys)
1509                 return -ENOMEM;
1510
1511         down_read(&current->mm->mmap_sem);
1512         srcu_idx = srcu_read_lock(&kvm->srcu);
1513         for (i = 0; i < args->count; i++) {
1514                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1515                 if (kvm_is_error_hva(hva)) {
1516                         r = -EFAULT;
1517                         break;
1518                 }
1519
1520                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1521                 if (r)
1522                         break;
1523         }
1524         srcu_read_unlock(&kvm->srcu, srcu_idx);
1525         up_read(&current->mm->mmap_sem);
1526
1527         if (!r) {
1528                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1529                                  sizeof(uint8_t) * args->count);
1530                 if (r)
1531                         r = -EFAULT;
1532         }
1533
1534         kvfree(keys);
1535         return r;
1536 }
1537
1538 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1539 {
1540         uint8_t *keys;
1541         uint64_t hva;
1542         int srcu_idx, i, r = 0;
1543
1544         if (args->flags != 0)
1545                 return -EINVAL;
1546
1547         /* Enforce sane limit on memory allocation */
1548         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1549                 return -EINVAL;
1550
1551         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1552         if (!keys)
1553                 return -ENOMEM;
1554
1555         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1556                            sizeof(uint8_t) * args->count);
1557         if (r) {
1558                 r = -EFAULT;
1559                 goto out;
1560         }
1561
1562         /* Enable storage key handling for the guest */
1563         r = s390_enable_skey();
1564         if (r)
1565                 goto out;
1566
1567         down_read(&current->mm->mmap_sem);
1568         srcu_idx = srcu_read_lock(&kvm->srcu);
1569         for (i = 0; i < args->count; i++) {
1570                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1571                 if (kvm_is_error_hva(hva)) {
1572                         r = -EFAULT;
1573                         break;
1574                 }
1575
1576                 /* Lowest order bit is reserved */
1577                 if (keys[i] & 0x01) {
1578                         r = -EINVAL;
1579                         break;
1580                 }
1581
1582                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1583                 if (r)
1584                         break;
1585         }
1586         srcu_read_unlock(&kvm->srcu, srcu_idx);
1587         up_read(&current->mm->mmap_sem);
1588 out:
1589         kvfree(keys);
1590         return r;
1591 }
1592
1593 /*
1594  * Base address and length must be sent at the start of each block, therefore
1595  * it's cheaper to send some clean data, as long as it's less than the size of
1596  * two longs.
1597  */
1598 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1599 /* for consistency */
1600 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1601
1602 /*
1603  * This function searches for the next page with dirty CMMA attributes, and
1604  * saves the attributes in the buffer up to either the end of the buffer or
1605  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1606  * no trailing clean bytes are saved.
1607  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1608  * output buffer will indicate 0 as length.
1609  */
1610 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1611                                   struct kvm_s390_cmma_log *args)
1612 {
1613         struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1614         unsigned long bufsize, hva, pgstev, i, next, cur;
1615         int srcu_idx, peek, r = 0, rr;
1616         u8 *res;
1617
1618         cur = args->start_gfn;
1619         i = next = pgstev = 0;
1620
1621         if (unlikely(!kvm->arch.use_cmma))
1622                 return -ENXIO;
1623         /* Invalid/unsupported flags were specified */
1624         if (args->flags & ~KVM_S390_CMMA_PEEK)
1625                 return -EINVAL;
1626         /* Migration mode query, and we are not doing a migration */
1627         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1628         if (!peek && !s)
1629                 return -EINVAL;
1630         /* CMMA is disabled or was not used, or the buffer has length zero */
1631         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1632         if (!bufsize || !kvm->mm->context.uses_cmm) {
1633                 memset(args, 0, sizeof(*args));
1634                 return 0;
1635         }
1636
1637         if (!peek) {
1638                 /* We are not peeking, and there are no dirty pages */
1639                 if (!atomic64_read(&s->dirty_pages)) {
1640                         memset(args, 0, sizeof(*args));
1641                         return 0;
1642                 }
1643                 cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1644                                     args->start_gfn);
1645                 if (cur >= s->bitmap_size)      /* nothing found, loop back */
1646                         cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1647                 if (cur >= s->bitmap_size) {    /* again! (very unlikely) */
1648                         memset(args, 0, sizeof(*args));
1649                         return 0;
1650                 }
1651                 next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1652         }
1653
1654         res = vmalloc(bufsize);
1655         if (!res)
1656                 return -ENOMEM;
1657
1658         args->start_gfn = cur;
1659
1660         down_read(&kvm->mm->mmap_sem);
1661         srcu_idx = srcu_read_lock(&kvm->srcu);
1662         while (i < bufsize) {
1663                 hva = gfn_to_hva(kvm, cur);
1664                 if (kvm_is_error_hva(hva)) {
1665                         r = -EFAULT;
1666                         break;
1667                 }
1668                 /* decrement only if we actually flipped the bit to 0 */
1669                 if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1670                         atomic64_dec(&s->dirty_pages);
1671                 r = get_pgste(kvm->mm, hva, &pgstev);
1672                 if (r < 0)
1673                         pgstev = 0;
1674                 /* save the value */
1675                 res[i++] = (pgstev >> 24) & 0x43;
1676                 /*
1677                  * if the next bit is too far away, stop.
1678                  * if we reached the previous "next", find the next one
1679                  */
1680                 if (!peek) {
1681                         if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1682                                 break;
1683                         if (cur == next)
1684                                 next = find_next_bit(s->pgste_bitmap,
1685                                                      s->bitmap_size, cur + 1);
1686                 /* reached the end of the bitmap or of the buffer, stop */
1687                         if ((next >= s->bitmap_size) ||
1688                             (next >= args->start_gfn + bufsize))
1689                                 break;
1690                 }
1691                 cur++;
1692         }
1693         srcu_read_unlock(&kvm->srcu, srcu_idx);
1694         up_read(&kvm->mm->mmap_sem);
1695         args->count = i;
1696         args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1697
1698         rr = copy_to_user((void __user *)args->values, res, args->count);
1699         if (rr)
1700                 r = -EFAULT;
1701
1702         vfree(res);
1703         return r;
1704 }
1705
1706 /*
1707  * This function sets the CMMA attributes for the given pages. If the input
1708  * buffer has zero length, no action is taken, otherwise the attributes are
1709  * set and the mm->context.uses_cmm flag is set.
1710  */
1711 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1712                                   const struct kvm_s390_cmma_log *args)
1713 {
1714         unsigned long hva, mask, pgstev, i;
1715         uint8_t *bits;
1716         int srcu_idx, r = 0;
1717
1718         mask = args->mask;
1719
1720         if (!kvm->arch.use_cmma)
1721                 return -ENXIO;
1722         /* invalid/unsupported flags */
1723         if (args->flags != 0)
1724                 return -EINVAL;
1725         /* Enforce sane limit on memory allocation */
1726         if (args->count > KVM_S390_CMMA_SIZE_MAX)
1727                 return -EINVAL;
1728         /* Nothing to do */
1729         if (args->count == 0)
1730                 return 0;
1731
1732         bits = vmalloc(sizeof(*bits) * args->count);
1733         if (!bits)
1734                 return -ENOMEM;
1735
1736         r = copy_from_user(bits, (void __user *)args->values, args->count);
1737         if (r) {
1738                 r = -EFAULT;
1739                 goto out;
1740         }
1741
1742         down_read(&kvm->mm->mmap_sem);
1743         srcu_idx = srcu_read_lock(&kvm->srcu);
1744         for (i = 0; i < args->count; i++) {
1745                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1746                 if (kvm_is_error_hva(hva)) {
1747                         r = -EFAULT;
1748                         break;
1749                 }
1750
1751                 pgstev = bits[i];
1752                 pgstev = pgstev << 24;
1753                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1754                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
1755         }
1756         srcu_read_unlock(&kvm->srcu, srcu_idx);
1757         up_read(&kvm->mm->mmap_sem);
1758
1759         if (!kvm->mm->context.uses_cmm) {
1760                 down_write(&kvm->mm->mmap_sem);
1761                 kvm->mm->context.uses_cmm = 1;
1762                 up_write(&kvm->mm->mmap_sem);
1763         }
1764 out:
1765         vfree(bits);
1766         return r;
1767 }
1768
1769 long kvm_arch_vm_ioctl(struct file *filp,
1770                        unsigned int ioctl, unsigned long arg)
1771 {
1772         struct kvm *kvm = filp->private_data;
1773         void __user *argp = (void __user *)arg;
1774         struct kvm_device_attr attr;
1775         int r;
1776
1777         switch (ioctl) {
1778         case KVM_S390_INTERRUPT: {
1779                 struct kvm_s390_interrupt s390int;
1780
1781                 r = -EFAULT;
1782                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1783                         break;
1784                 r = kvm_s390_inject_vm(kvm, &s390int);
1785                 break;
1786         }
1787         case KVM_ENABLE_CAP: {
1788                 struct kvm_enable_cap cap;
1789                 r = -EFAULT;
1790                 if (copy_from_user(&cap, argp, sizeof(cap)))
1791                         break;
1792                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1793                 break;
1794         }
1795         case KVM_CREATE_IRQCHIP: {
1796                 struct kvm_irq_routing_entry routing;
1797
1798                 r = -EINVAL;
1799                 if (kvm->arch.use_irqchip) {
1800                         /* Set up dummy routing. */
1801                         memset(&routing, 0, sizeof(routing));
1802                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1803                 }
1804                 break;
1805         }
1806         case KVM_SET_DEVICE_ATTR: {
1807                 r = -EFAULT;
1808                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1809                         break;
1810                 r = kvm_s390_vm_set_attr(kvm, &attr);
1811                 break;
1812         }
1813         case KVM_GET_DEVICE_ATTR: {
1814                 r = -EFAULT;
1815                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1816                         break;
1817                 r = kvm_s390_vm_get_attr(kvm, &attr);
1818                 break;
1819         }
1820         case KVM_HAS_DEVICE_ATTR: {
1821                 r = -EFAULT;
1822                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1823                         break;
1824                 r = kvm_s390_vm_has_attr(kvm, &attr);
1825                 break;
1826         }
1827         case KVM_S390_GET_SKEYS: {
1828                 struct kvm_s390_skeys args;
1829
1830                 r = -EFAULT;
1831                 if (copy_from_user(&args, argp,
1832                                    sizeof(struct kvm_s390_skeys)))
1833                         break;
1834                 r = kvm_s390_get_skeys(kvm, &args);
1835                 break;
1836         }
1837         case KVM_S390_SET_SKEYS: {
1838                 struct kvm_s390_skeys args;
1839
1840                 r = -EFAULT;
1841                 if (copy_from_user(&args, argp,
1842                                    sizeof(struct kvm_s390_skeys)))
1843                         break;
1844                 r = kvm_s390_set_skeys(kvm, &args);
1845                 break;
1846         }
1847         case KVM_S390_GET_CMMA_BITS: {
1848                 struct kvm_s390_cmma_log args;
1849
1850                 r = -EFAULT;
1851                 if (copy_from_user(&args, argp, sizeof(args)))
1852                         break;
1853                 mutex_lock(&kvm->slots_lock);
1854                 r = kvm_s390_get_cmma_bits(kvm, &args);
1855                 mutex_unlock(&kvm->slots_lock);
1856                 if (!r) {
1857                         r = copy_to_user(argp, &args, sizeof(args));
1858                         if (r)
1859                                 r = -EFAULT;
1860                 }
1861                 break;
1862         }
1863         case KVM_S390_SET_CMMA_BITS: {
1864                 struct kvm_s390_cmma_log args;
1865
1866                 r = -EFAULT;
1867                 if (copy_from_user(&args, argp, sizeof(args)))
1868                         break;
1869                 mutex_lock(&kvm->slots_lock);
1870                 r = kvm_s390_set_cmma_bits(kvm, &args);
1871                 mutex_unlock(&kvm->slots_lock);
1872                 break;
1873         }
1874         default:
1875                 r = -ENOTTY;
1876         }
1877
1878         return r;
1879 }
1880
1881 static int kvm_s390_query_ap_config(u8 *config)
1882 {
1883         u32 fcn_code = 0x04000000UL;
1884         u32 cc = 0;
1885
1886         memset(config, 0, 128);
1887         asm volatile(
1888                 "lgr 0,%1\n"
1889                 "lgr 2,%2\n"
1890                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1891                 "0: ipm %0\n"
1892                 "srl %0,28\n"
1893                 "1:\n"
1894                 EX_TABLE(0b, 1b)
1895                 : "+r" (cc)
1896                 : "r" (fcn_code), "r" (config)
1897                 : "cc", "0", "2", "memory"
1898         );
1899
1900         return cc;
1901 }
1902
1903 static int kvm_s390_apxa_installed(void)
1904 {
1905         u8 config[128];
1906         int cc;
1907
1908         if (test_facility(12)) {
1909                 cc = kvm_s390_query_ap_config(config);
1910
1911                 if (cc)
1912                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1913                 else
1914                         return config[0] & 0x40;
1915         }
1916
1917         return 0;
1918 }
1919
1920 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1921 {
1922         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1923
1924         if (kvm_s390_apxa_installed())
1925                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1926         else
1927                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1928 }
1929
1930 static u64 kvm_s390_get_initial_cpuid(void)
1931 {
1932         struct cpuid cpuid;
1933
1934         get_cpu_id(&cpuid);
1935         cpuid.version = 0xff;
1936         return *((u64 *) &cpuid);
1937 }
1938
1939 static void kvm_s390_crypto_init(struct kvm *kvm)
1940 {
1941         if (!test_kvm_facility(kvm, 76))
1942                 return;
1943
1944         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1945         kvm_s390_set_crycb_format(kvm);
1946
1947         /* Enable AES/DEA protected key functions by default */
1948         kvm->arch.crypto.aes_kw = 1;
1949         kvm->arch.crypto.dea_kw = 1;
1950         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1951                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1952         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1953                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1954 }
1955
1956 static void sca_dispose(struct kvm *kvm)
1957 {
1958         if (kvm->arch.use_esca)
1959                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1960         else
1961                 free_page((unsigned long)(kvm->arch.sca));
1962         kvm->arch.sca = NULL;
1963 }
1964
1965 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1966 {
1967         gfp_t alloc_flags = GFP_KERNEL;
1968         int i, rc;
1969         char debug_name[16];
1970         static unsigned long sca_offset;
1971
1972         rc = -EINVAL;
1973 #ifdef CONFIG_KVM_S390_UCONTROL
1974         if (type & ~KVM_VM_S390_UCONTROL)
1975                 goto out_err;
1976         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1977                 goto out_err;
1978 #else
1979         if (type)
1980                 goto out_err;
1981 #endif
1982
1983         rc = s390_enable_sie();
1984         if (rc)
1985                 goto out_err;
1986
1987         rc = -ENOMEM;
1988
1989         if (!sclp.has_64bscao)
1990                 alloc_flags |= GFP_DMA;
1991         rwlock_init(&kvm->arch.sca_lock);
1992         /* start with basic SCA */
1993         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1994         if (!kvm->arch.sca)
1995                 goto out_err;
1996         spin_lock(&kvm_lock);
1997         sca_offset += 16;
1998         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1999                 sca_offset = 0;
2000         kvm->arch.sca = (struct bsca_block *)
2001                         ((char *) kvm->arch.sca + sca_offset);
2002         spin_unlock(&kvm_lock);
2003
2004         sprintf(debug_name, "kvm-%u", current->pid);
2005
2006         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2007         if (!kvm->arch.dbf)
2008                 goto out_err;
2009
2010         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2011         kvm->arch.sie_page2 =
2012              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2013         if (!kvm->arch.sie_page2)
2014                 goto out_err;
2015
2016         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2017
2018         for (i = 0; i < kvm_s390_fac_size(); i++) {
2019                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2020                                               (kvm_s390_fac_base[i] |
2021                                                kvm_s390_fac_ext[i]);
2022                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2023                                               kvm_s390_fac_base[i];
2024         }
2025
2026         /* we are always in czam mode - even on pre z14 machines */
2027         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2028         set_kvm_facility(kvm->arch.model.fac_list, 138);
2029         /* we emulate STHYI in kvm */
2030         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2031         set_kvm_facility(kvm->arch.model.fac_list, 74);
2032         if (MACHINE_HAS_TLB_GUEST) {
2033                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2034                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2035         }
2036
2037         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2038         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2039
2040         kvm_s390_crypto_init(kvm);
2041
2042         mutex_init(&kvm->arch.float_int.ais_lock);
2043         spin_lock_init(&kvm->arch.float_int.lock);
2044         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2045                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2046         init_waitqueue_head(&kvm->arch.ipte_wq);
2047         mutex_init(&kvm->arch.ipte_mutex);
2048
2049         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2050         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2051
2052         if (type & KVM_VM_S390_UCONTROL) {
2053                 kvm->arch.gmap = NULL;
2054                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2055         } else {
2056                 if (sclp.hamax == U64_MAX)
2057                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2058                 else
2059                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2060                                                     sclp.hamax + 1);
2061                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2062                 if (!kvm->arch.gmap)
2063                         goto out_err;
2064                 kvm->arch.gmap->private = kvm;
2065                 kvm->arch.gmap->pfault_enabled = 0;
2066         }
2067
2068         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2069         kvm->arch.use_skf = sclp.has_skey;
2070         spin_lock_init(&kvm->arch.start_stop_lock);
2071         kvm_s390_vsie_init(kvm);
2072         kvm_s390_gisa_init(kvm);
2073         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2074
2075         return 0;
2076 out_err:
2077         free_page((unsigned long)kvm->arch.sie_page2);
2078         debug_unregister(kvm->arch.dbf);
2079         sca_dispose(kvm);
2080         KVM_EVENT(3, "creation of vm failed: %d", rc);
2081         return rc;
2082 }
2083
2084 bool kvm_arch_has_vcpu_debugfs(void)
2085 {
2086         return false;
2087 }
2088
2089 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2090 {
2091         return 0;
2092 }
2093
2094 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2095 {
2096         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2097         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2098         kvm_s390_clear_local_irqs(vcpu);
2099         kvm_clear_async_pf_completion_queue(vcpu);
2100         if (!kvm_is_ucontrol(vcpu->kvm))
2101                 sca_del_vcpu(vcpu);
2102
2103         if (kvm_is_ucontrol(vcpu->kvm))
2104                 gmap_remove(vcpu->arch.gmap);
2105
2106         if (vcpu->kvm->arch.use_cmma)
2107                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2108         free_page((unsigned long)(vcpu->arch.sie_block));
2109
2110         kvm_vcpu_uninit(vcpu);
2111         kmem_cache_free(kvm_vcpu_cache, vcpu);
2112 }
2113
2114 static void kvm_free_vcpus(struct kvm *kvm)
2115 {
2116         unsigned int i;
2117         struct kvm_vcpu *vcpu;
2118
2119         kvm_for_each_vcpu(i, vcpu, kvm)
2120                 kvm_arch_vcpu_destroy(vcpu);
2121
2122         mutex_lock(&kvm->lock);
2123         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2124                 kvm->vcpus[i] = NULL;
2125
2126         atomic_set(&kvm->online_vcpus, 0);
2127         mutex_unlock(&kvm->lock);
2128 }
2129
2130 void kvm_arch_destroy_vm(struct kvm *kvm)
2131 {
2132         kvm_free_vcpus(kvm);
2133         sca_dispose(kvm);
2134         debug_unregister(kvm->arch.dbf);
2135         kvm_s390_gisa_destroy(kvm);
2136         free_page((unsigned long)kvm->arch.sie_page2);
2137         if (!kvm_is_ucontrol(kvm))
2138                 gmap_remove(kvm->arch.gmap);
2139         kvm_s390_destroy_adapters(kvm);
2140         kvm_s390_clear_float_irqs(kvm);
2141         kvm_s390_vsie_destroy(kvm);
2142         if (kvm->arch.migration_state) {
2143                 vfree(kvm->arch.migration_state->pgste_bitmap);
2144                 kfree(kvm->arch.migration_state);
2145         }
2146         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2147 }
2148
2149 /* Section: vcpu related */
2150 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2151 {
2152         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2153         if (!vcpu->arch.gmap)
2154                 return -ENOMEM;
2155         vcpu->arch.gmap->private = vcpu->kvm;
2156
2157         return 0;
2158 }
2159
2160 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2161 {
2162         if (!kvm_s390_use_sca_entries())
2163                 return;
2164         read_lock(&vcpu->kvm->arch.sca_lock);
2165         if (vcpu->kvm->arch.use_esca) {
2166                 struct esca_block *sca = vcpu->kvm->arch.sca;
2167
2168                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2169                 sca->cpu[vcpu->vcpu_id].sda = 0;
2170         } else {
2171                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2172
2173                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2174                 sca->cpu[vcpu->vcpu_id].sda = 0;
2175         }
2176         read_unlock(&vcpu->kvm->arch.sca_lock);
2177 }
2178
2179 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2180 {
2181         if (!kvm_s390_use_sca_entries()) {
2182                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2183
2184                 /* we still need the basic sca for the ipte control */
2185                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2186                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2187                 return;
2188         }
2189         read_lock(&vcpu->kvm->arch.sca_lock);
2190         if (vcpu->kvm->arch.use_esca) {
2191                 struct esca_block *sca = vcpu->kvm->arch.sca;
2192
2193                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2194                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2195                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2196                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2197                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2198         } else {
2199                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2200
2201                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2202                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2203                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2204                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2205         }
2206         read_unlock(&vcpu->kvm->arch.sca_lock);
2207 }
2208
2209 /* Basic SCA to Extended SCA data copy routines */
2210 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2211 {
2212         d->sda = s->sda;
2213         d->sigp_ctrl.c = s->sigp_ctrl.c;
2214         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2215 }
2216
2217 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2218 {
2219         int i;
2220
2221         d->ipte_control = s->ipte_control;
2222         d->mcn[0] = s->mcn;
2223         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2224                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2225 }
2226
2227 static int sca_switch_to_extended(struct kvm *kvm)
2228 {
2229         struct bsca_block *old_sca = kvm->arch.sca;
2230         struct esca_block *new_sca;
2231         struct kvm_vcpu *vcpu;
2232         unsigned int vcpu_idx;
2233         u32 scaol, scaoh;
2234
2235         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2236         if (!new_sca)
2237                 return -ENOMEM;
2238
2239         scaoh = (u32)((u64)(new_sca) >> 32);
2240         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2241
2242         kvm_s390_vcpu_block_all(kvm);
2243         write_lock(&kvm->arch.sca_lock);
2244
2245         sca_copy_b_to_e(new_sca, old_sca);
2246
2247         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2248                 vcpu->arch.sie_block->scaoh = scaoh;
2249                 vcpu->arch.sie_block->scaol = scaol;
2250                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2251         }
2252         kvm->arch.sca = new_sca;
2253         kvm->arch.use_esca = 1;
2254
2255         write_unlock(&kvm->arch.sca_lock);
2256         kvm_s390_vcpu_unblock_all(kvm);
2257
2258         free_page((unsigned long)old_sca);
2259
2260         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2261                  old_sca, kvm->arch.sca);
2262         return 0;
2263 }
2264
2265 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2266 {
2267         int rc;
2268
2269         if (!kvm_s390_use_sca_entries()) {
2270                 if (id < KVM_MAX_VCPUS)
2271                         return true;
2272                 return false;
2273         }
2274         if (id < KVM_S390_BSCA_CPU_SLOTS)
2275                 return true;
2276         if (!sclp.has_esca || !sclp.has_64bscao)
2277                 return false;
2278
2279         mutex_lock(&kvm->lock);
2280         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2281         mutex_unlock(&kvm->lock);
2282
2283         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2284 }
2285
2286 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2287 {
2288         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2289         kvm_clear_async_pf_completion_queue(vcpu);
2290         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2291                                     KVM_SYNC_GPRS |
2292                                     KVM_SYNC_ACRS |
2293                                     KVM_SYNC_CRS |
2294                                     KVM_SYNC_ARCH0 |
2295                                     KVM_SYNC_PFAULT;
2296         kvm_s390_set_prefix(vcpu, 0);
2297         if (test_kvm_facility(vcpu->kvm, 64))
2298                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2299         if (test_kvm_facility(vcpu->kvm, 82))
2300                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2301         if (test_kvm_facility(vcpu->kvm, 133))
2302                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2303         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2304          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2305          */
2306         if (MACHINE_HAS_VX)
2307                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2308         else
2309                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2310
2311         if (kvm_is_ucontrol(vcpu->kvm))
2312                 return __kvm_ucontrol_vcpu_init(vcpu);
2313
2314         return 0;
2315 }
2316
2317 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2318 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2319 {
2320         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2321         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2322         vcpu->arch.cputm_start = get_tod_clock_fast();
2323         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2324 }
2325
2326 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2327 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2328 {
2329         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2330         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2331         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2332         vcpu->arch.cputm_start = 0;
2333         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2334 }
2335
2336 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2337 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2338 {
2339         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2340         vcpu->arch.cputm_enabled = true;
2341         __start_cpu_timer_accounting(vcpu);
2342 }
2343
2344 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2345 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2346 {
2347         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2348         __stop_cpu_timer_accounting(vcpu);
2349         vcpu->arch.cputm_enabled = false;
2350 }
2351
2352 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2353 {
2354         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2355         __enable_cpu_timer_accounting(vcpu);
2356         preempt_enable();
2357 }
2358
2359 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2360 {
2361         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2362         __disable_cpu_timer_accounting(vcpu);
2363         preempt_enable();
2364 }
2365
2366 /* set the cpu timer - may only be called from the VCPU thread itself */
2367 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2368 {
2369         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2370         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2371         if (vcpu->arch.cputm_enabled)
2372                 vcpu->arch.cputm_start = get_tod_clock_fast();
2373         vcpu->arch.sie_block->cputm = cputm;
2374         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2375         preempt_enable();
2376 }
2377
2378 /* update and get the cpu timer - can also be called from other VCPU threads */
2379 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2380 {
2381         unsigned int seq;
2382         __u64 value;
2383
2384         if (unlikely(!vcpu->arch.cputm_enabled))
2385                 return vcpu->arch.sie_block->cputm;
2386
2387         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2388         do {
2389                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2390                 /*
2391                  * If the writer would ever execute a read in the critical
2392                  * section, e.g. in irq context, we have a deadlock.
2393                  */
2394                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2395                 value = vcpu->arch.sie_block->cputm;
2396                 /* if cputm_start is 0, accounting is being started/stopped */
2397                 if (likely(vcpu->arch.cputm_start))
2398                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2399         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2400         preempt_enable();
2401         return value;
2402 }
2403
2404 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2405 {
2406
2407         gmap_enable(vcpu->arch.enabled_gmap);
2408         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2409         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2410                 __start_cpu_timer_accounting(vcpu);
2411         vcpu->cpu = cpu;
2412 }
2413
2414 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2415 {
2416         vcpu->cpu = -1;
2417         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2418                 __stop_cpu_timer_accounting(vcpu);
2419         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2420         vcpu->arch.enabled_gmap = gmap_get_enabled();
2421         gmap_disable(vcpu->arch.enabled_gmap);
2422
2423 }
2424
2425 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2426 {
2427         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2428         vcpu->arch.sie_block->gpsw.mask = 0UL;
2429         vcpu->arch.sie_block->gpsw.addr = 0UL;
2430         kvm_s390_set_prefix(vcpu, 0);
2431         kvm_s390_set_cpu_timer(vcpu, 0);
2432         vcpu->arch.sie_block->ckc       = 0UL;
2433         vcpu->arch.sie_block->todpr     = 0;
2434         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2435         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2436                                         CR0_INTERRUPT_KEY_SUBMASK |
2437                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2438         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2439                                         CR14_UNUSED_33 |
2440                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2441         /* make sure the new fpc will be lazily loaded */
2442         save_fpu_regs();
2443         current->thread.fpu.fpc = 0;
2444         vcpu->arch.sie_block->gbea = 1;
2445         vcpu->arch.sie_block->pp = 0;
2446         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2447         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2448         kvm_clear_async_pf_completion_queue(vcpu);
2449         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2450                 kvm_s390_vcpu_stop(vcpu);
2451         kvm_s390_clear_local_irqs(vcpu);
2452 }
2453
2454 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2455 {
2456         mutex_lock(&vcpu->kvm->lock);
2457         preempt_disable();
2458         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2459         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2460         preempt_enable();
2461         mutex_unlock(&vcpu->kvm->lock);
2462         if (!kvm_is_ucontrol(vcpu->kvm)) {
2463                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2464                 sca_add_vcpu(vcpu);
2465         }
2466         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2467                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2468         /* make vcpu_load load the right gmap on the first trigger */
2469         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2470 }
2471
2472 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2473 {
2474         if (!test_kvm_facility(vcpu->kvm, 76))
2475                 return;
2476
2477         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2478
2479         if (vcpu->kvm->arch.crypto.aes_kw)
2480                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2481         if (vcpu->kvm->arch.crypto.dea_kw)
2482                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2483
2484         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2485 }
2486
2487 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2488 {
2489         free_page(vcpu->arch.sie_block->cbrlo);
2490         vcpu->arch.sie_block->cbrlo = 0;
2491 }
2492
2493 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2494 {
2495         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2496         if (!vcpu->arch.sie_block->cbrlo)
2497                 return -ENOMEM;
2498         return 0;
2499 }
2500
2501 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2502 {
2503         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2504
2505         vcpu->arch.sie_block->ibc = model->ibc;
2506         if (test_kvm_facility(vcpu->kvm, 7))
2507                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2508 }
2509
2510 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2511 {
2512         int rc = 0;
2513
2514         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2515                                                     CPUSTAT_SM |
2516                                                     CPUSTAT_STOPPED);
2517
2518         if (test_kvm_facility(vcpu->kvm, 78))
2519                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2520         else if (test_kvm_facility(vcpu->kvm, 8))
2521                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2522
2523         kvm_s390_vcpu_setup_model(vcpu);
2524
2525         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2526         if (MACHINE_HAS_ESOP)
2527                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2528         if (test_kvm_facility(vcpu->kvm, 9))
2529                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2530         if (test_kvm_facility(vcpu->kvm, 73))
2531                 vcpu->arch.sie_block->ecb |= ECB_TE;
2532
2533         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2534                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2535         if (test_kvm_facility(vcpu->kvm, 130))
2536                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2537         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2538         if (sclp.has_cei)
2539                 vcpu->arch.sie_block->eca |= ECA_CEI;
2540         if (sclp.has_ib)
2541                 vcpu->arch.sie_block->eca |= ECA_IB;
2542         if (sclp.has_siif)
2543                 vcpu->arch.sie_block->eca |= ECA_SII;
2544         if (sclp.has_sigpif)
2545                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2546         if (test_kvm_facility(vcpu->kvm, 129)) {
2547                 vcpu->arch.sie_block->eca |= ECA_VX;
2548                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2549         }
2550         if (test_kvm_facility(vcpu->kvm, 139))
2551                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2552
2553         if (vcpu->arch.sie_block->gd) {
2554                 vcpu->arch.sie_block->eca |= ECA_AIV;
2555                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2556                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2557         }
2558         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2559                                         | SDNXC;
2560         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2561
2562         if (sclp.has_kss)
2563                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2564         else
2565                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2566
2567         if (vcpu->kvm->arch.use_cmma) {
2568                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2569                 if (rc)
2570                         return rc;
2571         }
2572         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2573         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2574
2575         kvm_s390_vcpu_crypto_setup(vcpu);
2576
2577         return rc;
2578 }
2579
2580 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2581                                       unsigned int id)
2582 {
2583         struct kvm_vcpu *vcpu;
2584         struct sie_page *sie_page;
2585         int rc = -EINVAL;
2586
2587         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2588                 goto out;
2589
2590         rc = -ENOMEM;
2591
2592         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2593         if (!vcpu)
2594                 goto out;
2595
2596         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2597         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2598         if (!sie_page)
2599                 goto out_free_cpu;
2600
2601         vcpu->arch.sie_block = &sie_page->sie_block;
2602         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2603
2604         /* the real guest size will always be smaller than msl */
2605         vcpu->arch.sie_block->mso = 0;
2606         vcpu->arch.sie_block->msl = sclp.hamax;
2607
2608         vcpu->arch.sie_block->icpua = id;
2609         spin_lock_init(&vcpu->arch.local_int.lock);
2610         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa;
2611         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2612                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2613         seqcount_init(&vcpu->arch.cputm_seqcount);
2614
2615         rc = kvm_vcpu_init(vcpu, kvm, id);
2616         if (rc)
2617                 goto out_free_sie_block;
2618         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2619                  vcpu->arch.sie_block);
2620         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2621
2622         return vcpu;
2623 out_free_sie_block:
2624         free_page((unsigned long)(vcpu->arch.sie_block));
2625 out_free_cpu:
2626         kmem_cache_free(kvm_vcpu_cache, vcpu);
2627 out:
2628         return ERR_PTR(rc);
2629 }
2630
2631 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2632 {
2633         return kvm_s390_vcpu_has_irq(vcpu, 0);
2634 }
2635
2636 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2637 {
2638         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2639 }
2640
2641 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2642 {
2643         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2644         exit_sie(vcpu);
2645 }
2646
2647 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2648 {
2649         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2650 }
2651
2652 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2653 {
2654         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2655         exit_sie(vcpu);
2656 }
2657
2658 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2659 {
2660         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2661 }
2662
2663 /*
2664  * Kick a guest cpu out of SIE and wait until SIE is not running.
2665  * If the CPU is not running (e.g. waiting as idle) the function will
2666  * return immediately. */
2667 void exit_sie(struct kvm_vcpu *vcpu)
2668 {
2669         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
2670         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2671                 cpu_relax();
2672 }
2673
2674 /* Kick a guest cpu out of SIE to process a request synchronously */
2675 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2676 {
2677         kvm_make_request(req, vcpu);
2678         kvm_s390_vcpu_request(vcpu);
2679 }
2680
2681 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2682                               unsigned long end)
2683 {
2684         struct kvm *kvm = gmap->private;
2685         struct kvm_vcpu *vcpu;
2686         unsigned long prefix;
2687         int i;
2688
2689         if (gmap_is_shadow(gmap))
2690                 return;
2691         if (start >= 1UL << 31)
2692                 /* We are only interested in prefix pages */
2693                 return;
2694         kvm_for_each_vcpu(i, vcpu, kvm) {
2695                 /* match against both prefix pages */
2696                 prefix = kvm_s390_get_prefix(vcpu);
2697                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2698                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2699                                    start, end);
2700                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2701                 }
2702         }
2703 }
2704
2705 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2706 {
2707         /* kvm common code refers to this, but never calls it */
2708         BUG();
2709         return 0;
2710 }
2711
2712 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2713                                            struct kvm_one_reg *reg)
2714 {
2715         int r = -EINVAL;
2716
2717         switch (reg->id) {
2718         case KVM_REG_S390_TODPR:
2719                 r = put_user(vcpu->arch.sie_block->todpr,
2720                              (u32 __user *)reg->addr);
2721                 break;
2722         case KVM_REG_S390_EPOCHDIFF:
2723                 r = put_user(vcpu->arch.sie_block->epoch,
2724                              (u64 __user *)reg->addr);
2725                 break;
2726         case KVM_REG_S390_CPU_TIMER:
2727                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2728                              (u64 __user *)reg->addr);
2729                 break;
2730         case KVM_REG_S390_CLOCK_COMP:
2731                 r = put_user(vcpu->arch.sie_block->ckc,
2732                              (u64 __user *)reg->addr);
2733                 break;
2734         case KVM_REG_S390_PFTOKEN:
2735                 r = put_user(vcpu->arch.pfault_token,
2736                              (u64 __user *)reg->addr);
2737                 break;
2738         case KVM_REG_S390_PFCOMPARE:
2739                 r = put_user(vcpu->arch.pfault_compare,
2740                              (u64 __user *)reg->addr);
2741                 break;
2742         case KVM_REG_S390_PFSELECT:
2743                 r = put_user(vcpu->arch.pfault_select,
2744                              (u64 __user *)reg->addr);
2745                 break;
2746         case KVM_REG_S390_PP:
2747                 r = put_user(vcpu->arch.sie_block->pp,
2748                              (u64 __user *)reg->addr);
2749                 break;
2750         case KVM_REG_S390_GBEA:
2751                 r = put_user(vcpu->arch.sie_block->gbea,
2752                              (u64 __user *)reg->addr);
2753                 break;
2754         default:
2755                 break;
2756         }
2757
2758         return r;
2759 }
2760
2761 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2762                                            struct kvm_one_reg *reg)
2763 {
2764         int r = -EINVAL;
2765         __u64 val;
2766
2767         switch (reg->id) {
2768         case KVM_REG_S390_TODPR:
2769                 r = get_user(vcpu->arch.sie_block->todpr,
2770                              (u32 __user *)reg->addr);
2771                 break;
2772         case KVM_REG_S390_EPOCHDIFF:
2773                 r = get_user(vcpu->arch.sie_block->epoch,
2774                              (u64 __user *)reg->addr);
2775                 break;
2776         case KVM_REG_S390_CPU_TIMER:
2777                 r = get_user(val, (u64 __user *)reg->addr);
2778                 if (!r)
2779                         kvm_s390_set_cpu_timer(vcpu, val);
2780                 break;
2781         case KVM_REG_S390_CLOCK_COMP:
2782                 r = get_user(vcpu->arch.sie_block->ckc,
2783                              (u64 __user *)reg->addr);
2784                 break;
2785         case KVM_REG_S390_PFTOKEN:
2786                 r = get_user(vcpu->arch.pfault_token,
2787                              (u64 __user *)reg->addr);
2788                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2789                         kvm_clear_async_pf_completion_queue(vcpu);
2790                 break;
2791         case KVM_REG_S390_PFCOMPARE:
2792                 r = get_user(vcpu->arch.pfault_compare,
2793                              (u64 __user *)reg->addr);
2794                 break;
2795         case KVM_REG_S390_PFSELECT:
2796                 r = get_user(vcpu->arch.pfault_select,
2797                              (u64 __user *)reg->addr);
2798                 break;
2799         case KVM_REG_S390_PP:
2800                 r = get_user(vcpu->arch.sie_block->pp,
2801                              (u64 __user *)reg->addr);
2802                 break;
2803         case KVM_REG_S390_GBEA:
2804                 r = get_user(vcpu->arch.sie_block->gbea,
2805                              (u64 __user *)reg->addr);
2806                 break;
2807         default:
2808                 break;
2809         }
2810
2811         return r;
2812 }
2813
2814 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2815 {
2816         kvm_s390_vcpu_initial_reset(vcpu);
2817         return 0;
2818 }
2819
2820 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2821 {
2822         vcpu_load(vcpu);
2823         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2824         vcpu_put(vcpu);
2825         return 0;
2826 }
2827
2828 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2829 {
2830         vcpu_load(vcpu);
2831         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2832         vcpu_put(vcpu);
2833         return 0;
2834 }
2835
2836 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2837                                   struct kvm_sregs *sregs)
2838 {
2839         vcpu_load(vcpu);
2840
2841         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2842         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2843
2844         vcpu_put(vcpu);
2845         return 0;
2846 }
2847
2848 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2849                                   struct kvm_sregs *sregs)
2850 {
2851         vcpu_load(vcpu);
2852
2853         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2854         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2855
2856         vcpu_put(vcpu);
2857         return 0;
2858 }
2859
2860 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2861 {
2862         int ret = 0;
2863
2864         vcpu_load(vcpu);
2865
2866         if (test_fp_ctl(fpu->fpc)) {
2867                 ret = -EINVAL;
2868                 goto out;
2869         }
2870         vcpu->run->s.regs.fpc = fpu->fpc;
2871         if (MACHINE_HAS_VX)
2872                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2873                                  (freg_t *) fpu->fprs);
2874         else
2875                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2876
2877 out:
2878         vcpu_put(vcpu);
2879         return ret;
2880 }
2881
2882 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2883 {
2884         vcpu_load(vcpu);
2885
2886         /* make sure we have the latest values */
2887         save_fpu_regs();
2888         if (MACHINE_HAS_VX)
2889                 convert_vx_to_fp((freg_t *) fpu->fprs,
2890                                  (__vector128 *) vcpu->run->s.regs.vrs);
2891         else
2892                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2893         fpu->fpc = vcpu->run->s.regs.fpc;
2894
2895         vcpu_put(vcpu);
2896         return 0;
2897 }
2898
2899 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2900 {
2901         int rc = 0;
2902
2903         if (!is_vcpu_stopped(vcpu))
2904                 rc = -EBUSY;
2905         else {
2906                 vcpu->run->psw_mask = psw.mask;
2907                 vcpu->run->psw_addr = psw.addr;
2908         }
2909         return rc;
2910 }
2911
2912 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2913                                   struct kvm_translation *tr)
2914 {
2915         return -EINVAL; /* not implemented yet */
2916 }
2917
2918 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2919                               KVM_GUESTDBG_USE_HW_BP | \
2920                               KVM_GUESTDBG_ENABLE)
2921
2922 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2923                                         struct kvm_guest_debug *dbg)
2924 {
2925         int rc = 0;
2926
2927         vcpu_load(vcpu);
2928
2929         vcpu->guest_debug = 0;
2930         kvm_s390_clear_bp_data(vcpu);
2931
2932         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
2933                 rc = -EINVAL;
2934                 goto out;
2935         }
2936         if (!sclp.has_gpere) {
2937                 rc = -EINVAL;
2938                 goto out;
2939         }
2940
2941         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2942                 vcpu->guest_debug = dbg->control;
2943                 /* enforce guest PER */
2944                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
2945
2946                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2947                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2948         } else {
2949                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2950                 vcpu->arch.guestdbg.last_bp = 0;
2951         }
2952
2953         if (rc) {
2954                 vcpu->guest_debug = 0;
2955                 kvm_s390_clear_bp_data(vcpu);
2956                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
2957         }
2958
2959 out:
2960         vcpu_put(vcpu);
2961         return rc;
2962 }
2963
2964 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2965                                     struct kvm_mp_state *mp_state)
2966 {
2967         int ret;
2968
2969         vcpu_load(vcpu);
2970
2971         /* CHECK_STOP and LOAD are not supported yet */
2972         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2973                                       KVM_MP_STATE_OPERATING;
2974
2975         vcpu_put(vcpu);
2976         return ret;
2977 }
2978
2979 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2980                                     struct kvm_mp_state *mp_state)
2981 {
2982         int rc = 0;
2983
2984         vcpu_load(vcpu);
2985
2986         /* user space knows about this interface - let it control the state */
2987         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2988
2989         switch (mp_state->mp_state) {
2990         case KVM_MP_STATE_STOPPED:
2991                 kvm_s390_vcpu_stop(vcpu);
2992                 break;
2993         case KVM_MP_STATE_OPERATING:
2994                 kvm_s390_vcpu_start(vcpu);
2995                 break;
2996         case KVM_MP_STATE_LOAD:
2997         case KVM_MP_STATE_CHECK_STOP:
2998                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2999         default:
3000                 rc = -ENXIO;
3001         }
3002
3003         vcpu_put(vcpu);
3004         return rc;
3005 }
3006
3007 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3008 {
3009         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3010 }
3011
3012 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3013 {
3014 retry:
3015         kvm_s390_vcpu_request_handled(vcpu);
3016         if (!kvm_request_pending(vcpu))
3017                 return 0;
3018         /*
3019          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3020          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3021          * This ensures that the ipte instruction for this request has
3022          * already finished. We might race against a second unmapper that
3023          * wants to set the blocking bit. Lets just retry the request loop.
3024          */
3025         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3026                 int rc;
3027                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3028                                           kvm_s390_get_prefix(vcpu),
3029                                           PAGE_SIZE * 2, PROT_WRITE);
3030                 if (rc) {
3031                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3032                         return rc;
3033                 }
3034                 goto retry;
3035         }
3036
3037         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3038                 vcpu->arch.sie_block->ihcpu = 0xffff;
3039                 goto retry;
3040         }
3041
3042         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3043                 if (!ibs_enabled(vcpu)) {
3044                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3045                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3046                 }
3047                 goto retry;
3048         }
3049
3050         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3051                 if (ibs_enabled(vcpu)) {
3052                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3053                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3054                 }
3055                 goto retry;
3056         }
3057
3058         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3059                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3060                 goto retry;
3061         }
3062
3063         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3064                 /*
3065                  * Disable CMM virtualization; we will emulate the ESSA
3066                  * instruction manually, in order to provide additional
3067                  * functionalities needed for live migration.
3068                  */
3069                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3070                 goto retry;
3071         }
3072
3073         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3074                 /*
3075                  * Re-enable CMM virtualization if CMMA is available and
3076                  * CMM has been used.
3077                  */
3078                 if ((vcpu->kvm->arch.use_cmma) &&
3079                     (vcpu->kvm->mm->context.uses_cmm))
3080                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3081                 goto retry;
3082         }
3083
3084         /* nothing to do, just clear the request */
3085         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3086
3087         return 0;
3088 }
3089
3090 void kvm_s390_set_tod_clock(struct kvm *kvm,
3091                             const struct kvm_s390_vm_tod_clock *gtod)
3092 {
3093         struct kvm_vcpu *vcpu;
3094         struct kvm_s390_tod_clock_ext htod;
3095         int i;
3096
3097         mutex_lock(&kvm->lock);
3098         preempt_disable();
3099
3100         get_tod_clock_ext((char *)&htod);
3101
3102         kvm->arch.epoch = gtod->tod - htod.tod;
3103         kvm->arch.epdx = 0;
3104         if (test_kvm_facility(kvm, 139)) {
3105                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3106                 if (kvm->arch.epoch > gtod->tod)
3107                         kvm->arch.epdx -= 1;
3108         }
3109
3110         kvm_s390_vcpu_block_all(kvm);
3111         kvm_for_each_vcpu(i, vcpu, kvm) {
3112                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3113                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3114         }
3115
3116         kvm_s390_vcpu_unblock_all(kvm);
3117         preempt_enable();
3118         mutex_unlock(&kvm->lock);
3119 }
3120
3121 /**
3122  * kvm_arch_fault_in_page - fault-in guest page if necessary
3123  * @vcpu: The corresponding virtual cpu
3124  * @gpa: Guest physical address
3125  * @writable: Whether the page should be writable or not
3126  *
3127  * Make sure that a guest page has been faulted-in on the host.
3128  *
3129  * Return: Zero on success, negative error code otherwise.
3130  */
3131 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3132 {
3133         return gmap_fault(vcpu->arch.gmap, gpa,
3134                           writable ? FAULT_FLAG_WRITE : 0);
3135 }
3136
3137 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3138                                       unsigned long token)
3139 {
3140         struct kvm_s390_interrupt inti;
3141         struct kvm_s390_irq irq;
3142
3143         if (start_token) {
3144                 irq.u.ext.ext_params2 = token;
3145                 irq.type = KVM_S390_INT_PFAULT_INIT;
3146                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3147         } else {
3148                 inti.type = KVM_S390_INT_PFAULT_DONE;
3149                 inti.parm64 = token;
3150                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3151         }
3152 }
3153
3154 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3155                                      struct kvm_async_pf *work)
3156 {
3157         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3158         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3159 }
3160
3161 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3162                                  struct kvm_async_pf *work)
3163 {
3164         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3165         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3166 }
3167
3168 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3169                                struct kvm_async_pf *work)
3170 {
3171         /* s390 will always inject the page directly */
3172 }
3173
3174 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3175 {
3176         /*
3177          * s390 will always inject the page directly,
3178          * but we still want check_async_completion to cleanup
3179          */
3180         return true;
3181 }
3182
3183 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3184 {
3185         hva_t hva;
3186         struct kvm_arch_async_pf arch;
3187         int rc;
3188
3189         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3190                 return 0;
3191         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3192             vcpu->arch.pfault_compare)
3193                 return 0;
3194         if (psw_extint_disabled(vcpu))
3195                 return 0;
3196         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3197                 return 0;
3198         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3199                 return 0;
3200         if (!vcpu->arch.gmap->pfault_enabled)
3201                 return 0;
3202
3203         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3204         hva += current->thread.gmap_addr & ~PAGE_MASK;
3205         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3206                 return 0;
3207
3208         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3209         return rc;
3210 }
3211
3212 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3213 {
3214         int rc, cpuflags;
3215
3216         /*
3217          * On s390 notifications for arriving pages will be delivered directly
3218          * to the guest but the house keeping for completed pfaults is
3219          * handled outside the worker.
3220          */
3221         kvm_check_async_pf_completion(vcpu);
3222
3223         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3224         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3225
3226         if (need_resched())
3227                 schedule();
3228
3229         if (test_cpu_flag(CIF_MCCK_PENDING))
3230                 s390_handle_mcck();
3231
3232         if (!kvm_is_ucontrol(vcpu->kvm)) {
3233                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3234                 if (rc)
3235                         return rc;
3236         }
3237
3238         rc = kvm_s390_handle_requests(vcpu);
3239         if (rc)
3240                 return rc;
3241
3242         if (guestdbg_enabled(vcpu)) {
3243                 kvm_s390_backup_guest_per_regs(vcpu);
3244                 kvm_s390_patch_guest_per_regs(vcpu);
3245         }
3246
3247         vcpu->arch.sie_block->icptcode = 0;
3248         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3249         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3250         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3251
3252         return 0;
3253 }
3254
3255 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3256 {
3257         struct kvm_s390_pgm_info pgm_info = {
3258                 .code = PGM_ADDRESSING,
3259         };
3260         u8 opcode, ilen;
3261         int rc;
3262
3263         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3264         trace_kvm_s390_sie_fault(vcpu);
3265
3266         /*
3267          * We want to inject an addressing exception, which is defined as a
3268          * suppressing or terminating exception. However, since we came here
3269          * by a DAT access exception, the PSW still points to the faulting
3270          * instruction since DAT exceptions are nullifying. So we've got
3271          * to look up the current opcode to get the length of the instruction
3272          * to be able to forward the PSW.
3273          */
3274         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3275         ilen = insn_length(opcode);
3276         if (rc < 0) {
3277                 return rc;
3278         } else if (rc) {
3279                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3280                  * Forward by arbitrary ilc, injection will take care of
3281                  * nullification if necessary.
3282                  */
3283                 pgm_info = vcpu->arch.pgm;
3284                 ilen = 4;
3285         }
3286         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3287         kvm_s390_forward_psw(vcpu, ilen);
3288         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3289 }
3290
3291 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3292 {
3293         struct mcck_volatile_info *mcck_info;
3294         struct sie_page *sie_page;
3295
3296         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3297                    vcpu->arch.sie_block->icptcode);
3298         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3299
3300         if (guestdbg_enabled(vcpu))
3301                 kvm_s390_restore_guest_per_regs(vcpu);
3302
3303         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3304         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3305
3306         if (exit_reason == -EINTR) {
3307                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3308                 sie_page = container_of(vcpu->arch.sie_block,
3309                                         struct sie_page, sie_block);
3310                 mcck_info = &sie_page->mcck_info;
3311                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3312                 return 0;
3313         }
3314
3315         if (vcpu->arch.sie_block->icptcode > 0) {
3316                 int rc = kvm_handle_sie_intercept(vcpu);
3317
3318                 if (rc != -EOPNOTSUPP)
3319                         return rc;
3320                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3321                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3322                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3323                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3324                 return -EREMOTE;
3325         } else if (exit_reason != -EFAULT) {
3326                 vcpu->stat.exit_null++;
3327                 return 0;
3328         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3329                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3330                 vcpu->run->s390_ucontrol.trans_exc_code =
3331                                                 current->thread.gmap_addr;
3332                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3333                 return -EREMOTE;
3334         } else if (current->thread.gmap_pfault) {
3335                 trace_kvm_s390_major_guest_pfault(vcpu);
3336                 current->thread.gmap_pfault = 0;
3337                 if (kvm_arch_setup_async_pf(vcpu))
3338                         return 0;
3339                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3340         }
3341         return vcpu_post_run_fault_in_sie(vcpu);
3342 }
3343
3344 static int __vcpu_run(struct kvm_vcpu *vcpu)
3345 {
3346         int rc, exit_reason;
3347
3348         /*
3349          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3350          * ning the guest), so that memslots (and other stuff) are protected
3351          */
3352         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3353
3354         do {
3355                 rc = vcpu_pre_run(vcpu);
3356                 if (rc)
3357                         break;
3358
3359                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3360                 /*
3361                  * As PF_VCPU will be used in fault handler, between
3362                  * guest_enter and guest_exit should be no uaccess.
3363                  */
3364                 local_irq_disable();
3365                 guest_enter_irqoff();
3366                 __disable_cpu_timer_accounting(vcpu);
3367                 local_irq_enable();
3368                 exit_reason = sie64a(vcpu->arch.sie_block,
3369                                      vcpu->run->s.regs.gprs);
3370                 local_irq_disable();
3371                 __enable_cpu_timer_accounting(vcpu);
3372                 guest_exit_irqoff();
3373                 local_irq_enable();
3374                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3375
3376                 rc = vcpu_post_run(vcpu, exit_reason);
3377         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3378
3379         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3380         return rc;
3381 }
3382
3383 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3384 {
3385         struct runtime_instr_cb *riccb;
3386         struct gs_cb *gscb;
3387
3388         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3389         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3390         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3391         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3392         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3393                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3394         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3395                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3396                 /* some control register changes require a tlb flush */
3397                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3398         }
3399         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3400                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3401                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3402                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3403                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3404                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3405         }
3406         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3407                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3408                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3409                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3410                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3411                         kvm_clear_async_pf_completion_queue(vcpu);
3412         }
3413         /*
3414          * If userspace sets the riccb (e.g. after migration) to a valid state,
3415          * we should enable RI here instead of doing the lazy enablement.
3416          */
3417         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3418             test_kvm_facility(vcpu->kvm, 64) &&
3419             riccb->v &&
3420             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3421                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3422                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3423         }
3424         /*
3425          * If userspace sets the gscb (e.g. after migration) to non-zero,
3426          * we should enable GS here instead of doing the lazy enablement.
3427          */
3428         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3429             test_kvm_facility(vcpu->kvm, 133) &&
3430             gscb->gssm &&
3431             !vcpu->arch.gs_enabled) {
3432                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3433                 vcpu->arch.sie_block->ecb |= ECB_GS;
3434                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3435                 vcpu->arch.gs_enabled = 1;
3436         }
3437         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3438             test_kvm_facility(vcpu->kvm, 82)) {
3439                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3440                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3441         }
3442         save_access_regs(vcpu->arch.host_acrs);
3443         restore_access_regs(vcpu->run->s.regs.acrs);
3444         /* save host (userspace) fprs/vrs */
3445         save_fpu_regs();
3446         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3447         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3448         if (MACHINE_HAS_VX)
3449                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3450         else
3451                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3452         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3453         if (test_fp_ctl(current->thread.fpu.fpc))
3454                 /* User space provided an invalid FPC, let's clear it */
3455                 current->thread.fpu.fpc = 0;
3456         if (MACHINE_HAS_GS) {
3457                 preempt_disable();
3458                 __ctl_set_bit(2, 4);
3459                 if (current->thread.gs_cb) {
3460                         vcpu->arch.host_gscb = current->thread.gs_cb;
3461                         save_gs_cb(vcpu->arch.host_gscb);
3462                 }
3463                 if (vcpu->arch.gs_enabled) {
3464                         current->thread.gs_cb = (struct gs_cb *)
3465                                                 &vcpu->run->s.regs.gscb;
3466                         restore_gs_cb(current->thread.gs_cb);
3467                 }
3468                 preempt_enable();
3469         }
3470
3471         kvm_run->kvm_dirty_regs = 0;
3472 }
3473
3474 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3475 {
3476         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3477         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3478         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3479         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3480         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3481         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3482         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3483         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3484         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3485         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3486         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3487         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3488         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3489         save_access_regs(vcpu->run->s.regs.acrs);
3490         restore_access_regs(vcpu->arch.host_acrs);
3491         /* Save guest register state */
3492         save_fpu_regs();
3493         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3494         /* Restore will be done lazily at return */
3495         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3496         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3497         if (MACHINE_HAS_GS) {
3498                 __ctl_set_bit(2, 4);
3499                 if (vcpu->arch.gs_enabled)
3500                         save_gs_cb(current->thread.gs_cb);
3501                 preempt_disable();
3502                 current->thread.gs_cb = vcpu->arch.host_gscb;
3503                 restore_gs_cb(vcpu->arch.host_gscb);
3504                 preempt_enable();
3505                 if (!vcpu->arch.host_gscb)
3506                         __ctl_clear_bit(2, 4);
3507                 vcpu->arch.host_gscb = NULL;
3508         }
3509
3510 }
3511
3512 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3513 {
3514         int rc;
3515
3516         if (kvm_run->immediate_exit)
3517                 return -EINTR;
3518
3519         vcpu_load(vcpu);
3520
3521         if (guestdbg_exit_pending(vcpu)) {
3522                 kvm_s390_prepare_debug_exit(vcpu);
3523                 rc = 0;
3524                 goto out;
3525         }
3526
3527         kvm_sigset_activate(vcpu);
3528
3529         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3530                 kvm_s390_vcpu_start(vcpu);
3531         } else if (is_vcpu_stopped(vcpu)) {
3532                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3533                                    vcpu->vcpu_id);
3534                 rc = -EINVAL;
3535                 goto out;
3536         }
3537
3538         sync_regs(vcpu, kvm_run);
3539         enable_cpu_timer_accounting(vcpu);
3540
3541         might_fault();
3542         rc = __vcpu_run(vcpu);
3543
3544         if (signal_pending(current) && !rc) {
3545                 kvm_run->exit_reason = KVM_EXIT_INTR;
3546                 rc = -EINTR;
3547         }
3548
3549         if (guestdbg_exit_pending(vcpu) && !rc)  {
3550                 kvm_s390_prepare_debug_exit(vcpu);
3551                 rc = 0;
3552         }
3553
3554         if (rc == -EREMOTE) {
3555                 /* userspace support is needed, kvm_run has been prepared */
3556                 rc = 0;
3557         }
3558
3559         disable_cpu_timer_accounting(vcpu);
3560         store_regs(vcpu, kvm_run);
3561
3562         kvm_sigset_deactivate(vcpu);
3563
3564         vcpu->stat.exit_userspace++;
3565 out:
3566         vcpu_put(vcpu);
3567         return rc;
3568 }
3569
3570 /*
3571  * store status at address
3572  * we use have two special cases:
3573  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3574  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3575  */
3576 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3577 {
3578         unsigned char archmode = 1;
3579         freg_t fprs[NUM_FPRS];
3580         unsigned int px;
3581         u64 clkcomp, cputm;
3582         int rc;
3583
3584         px = kvm_s390_get_prefix(vcpu);
3585         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3586                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3587                         return -EFAULT;
3588                 gpa = 0;
3589         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3590                 if (write_guest_real(vcpu, 163, &archmode, 1))
3591                         return -EFAULT;
3592                 gpa = px;
3593         } else
3594                 gpa -= __LC_FPREGS_SAVE_AREA;
3595
3596         /* manually convert vector registers if necessary */
3597         if (MACHINE_HAS_VX) {
3598                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3599                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3600                                      fprs, 128);
3601         } else {
3602                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3603                                      vcpu->run->s.regs.fprs, 128);
3604         }
3605         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3606                               vcpu->run->s.regs.gprs, 128);
3607         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3608                               &vcpu->arch.sie_block->gpsw, 16);
3609         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3610                               &px, 4);
3611         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3612                               &vcpu->run->s.regs.fpc, 4);
3613         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3614                               &vcpu->arch.sie_block->todpr, 4);
3615         cputm = kvm_s390_get_cpu_timer(vcpu);
3616         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3617                               &cputm, 8);
3618         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3619         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3620                               &clkcomp, 8);
3621         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3622                               &vcpu->run->s.regs.acrs, 64);
3623         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3624                               &vcpu->arch.sie_block->gcr, 128);
3625         return rc ? -EFAULT : 0;
3626 }
3627
3628 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3629 {
3630         /*
3631          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3632          * switch in the run ioctl. Let's update our copies before we save
3633          * it into the save area
3634          */
3635         save_fpu_regs();
3636         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3637         save_access_regs(vcpu->run->s.regs.acrs);
3638
3639         return kvm_s390_store_status_unloaded(vcpu, addr);
3640 }
3641
3642 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3643 {
3644         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3645         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3646 }
3647
3648 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3649 {
3650         unsigned int i;
3651         struct kvm_vcpu *vcpu;
3652
3653         kvm_for_each_vcpu(i, vcpu, kvm) {
3654                 __disable_ibs_on_vcpu(vcpu);
3655         }
3656 }
3657
3658 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3659 {
3660         if (!sclp.has_ibs)
3661                 return;
3662         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3663         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3664 }
3665
3666 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3667 {
3668         int i, online_vcpus, started_vcpus = 0;
3669
3670         if (!is_vcpu_stopped(vcpu))
3671                 return;
3672
3673         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3674         /* Only one cpu at a time may enter/leave the STOPPED state. */
3675         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3676         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3677
3678         for (i = 0; i < online_vcpus; i++) {
3679                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3680                         started_vcpus++;
3681         }
3682
3683         if (started_vcpus == 0) {
3684                 /* we're the only active VCPU -> speed it up */
3685                 __enable_ibs_on_vcpu(vcpu);
3686         } else if (started_vcpus == 1) {
3687                 /*
3688                  * As we are starting a second VCPU, we have to disable
3689                  * the IBS facility on all VCPUs to remove potentially
3690                  * oustanding ENABLE requests.
3691                  */
3692                 __disable_ibs_on_all_vcpus(vcpu->kvm);
3693         }
3694
3695         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
3696         /*
3697          * Another VCPU might have used IBS while we were offline.
3698          * Let's play safe and flush the VCPU at startup.
3699          */
3700         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3701         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3702         return;
3703 }
3704
3705 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3706 {
3707         int i, online_vcpus, started_vcpus = 0;
3708         struct kvm_vcpu *started_vcpu = NULL;
3709
3710         if (is_vcpu_stopped(vcpu))
3711                 return;
3712
3713         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3714         /* Only one cpu at a time may enter/leave the STOPPED state. */
3715         spin_lock(&vcpu->kvm->arch.start_stop_lock);
3716         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3717
3718         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3719         kvm_s390_clear_stop_irq(vcpu);
3720
3721         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
3722         __disable_ibs_on_vcpu(vcpu);
3723
3724         for (i = 0; i < online_vcpus; i++) {
3725                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3726                         started_vcpus++;
3727                         started_vcpu = vcpu->kvm->vcpus[i];
3728                 }
3729         }
3730
3731         if (started_vcpus == 1) {
3732                 /*
3733                  * As we only have one VCPU left, we want to enable the
3734                  * IBS facility for that VCPU to speed it up.
3735                  */
3736                 __enable_ibs_on_vcpu(started_vcpu);
3737         }
3738
3739         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3740         return;
3741 }
3742
3743 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3744                                      struct kvm_enable_cap *cap)
3745 {
3746         int r;
3747
3748         if (cap->flags)
3749                 return -EINVAL;
3750
3751         switch (cap->cap) {
3752         case KVM_CAP_S390_CSS_SUPPORT:
3753                 if (!vcpu->kvm->arch.css_support) {
3754                         vcpu->kvm->arch.css_support = 1;
3755                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3756                         trace_kvm_s390_enable_css(vcpu->kvm);
3757                 }
3758                 r = 0;
3759                 break;
3760         default:
3761                 r = -EINVAL;
3762                 break;
3763         }
3764         return r;
3765 }
3766
3767 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3768                                   struct kvm_s390_mem_op *mop)
3769 {
3770         void __user *uaddr = (void __user *)mop->buf;
3771         void *tmpbuf = NULL;
3772         int r, srcu_idx;
3773         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3774                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3775
3776         if (mop->flags & ~supported_flags)
3777                 return -EINVAL;
3778
3779         if (mop->size > MEM_OP_MAX_SIZE)
3780                 return -E2BIG;
3781
3782         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3783                 tmpbuf = vmalloc(mop->size);
3784                 if (!tmpbuf)
3785                         return -ENOMEM;
3786         }
3787
3788         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3789
3790         switch (mop->op) {
3791         case KVM_S390_MEMOP_LOGICAL_READ:
3792                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3793                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3794                                             mop->size, GACC_FETCH);
3795                         break;
3796                 }
3797                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3798                 if (r == 0) {
3799                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3800                                 r = -EFAULT;
3801                 }
3802                 break;
3803         case KVM_S390_MEMOP_LOGICAL_WRITE:
3804                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3805                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3806                                             mop->size, GACC_STORE);
3807                         break;
3808                 }
3809                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3810                         r = -EFAULT;
3811                         break;
3812                 }
3813                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3814                 break;
3815         default:
3816                 r = -EINVAL;
3817         }
3818
3819         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3820
3821         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3822                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3823
3824         vfree(tmpbuf);
3825         return r;
3826 }
3827
3828 long kvm_arch_vcpu_async_ioctl(struct file *filp,
3829                                unsigned int ioctl, unsigned long arg)
3830 {
3831         struct kvm_vcpu *vcpu = filp->private_data;
3832         void __user *argp = (void __user *)arg;
3833
3834         switch (ioctl) {
3835         case KVM_S390_IRQ: {
3836                 struct kvm_s390_irq s390irq;
3837
3838                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3839                         return -EFAULT;
3840                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3841         }
3842         case KVM_S390_INTERRUPT: {
3843                 struct kvm_s390_interrupt s390int;
3844                 struct kvm_s390_irq s390irq;
3845
3846                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3847                         return -EFAULT;
3848                 if (s390int_to_s390irq(&s390int, &s390irq))
3849                         return -EINVAL;
3850                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
3851         }
3852         }
3853         return -ENOIOCTLCMD;
3854 }
3855
3856 long kvm_arch_vcpu_ioctl(struct file *filp,
3857                          unsigned int ioctl, unsigned long arg)
3858 {
3859         struct kvm_vcpu *vcpu = filp->private_data;
3860         void __user *argp = (void __user *)arg;
3861         int idx;
3862         long r;
3863
3864         vcpu_load(vcpu);
3865
3866         switch (ioctl) {
3867         case KVM_S390_STORE_STATUS:
3868                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3869                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3870                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3871                 break;
3872         case KVM_S390_SET_INITIAL_PSW: {
3873                 psw_t psw;
3874
3875                 r = -EFAULT;
3876                 if (copy_from_user(&psw, argp, sizeof(psw)))
3877                         break;
3878                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3879                 break;
3880         }
3881         case KVM_S390_INITIAL_RESET:
3882                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3883                 break;
3884         case KVM_SET_ONE_REG:
3885         case KVM_GET_ONE_REG: {
3886                 struct kvm_one_reg reg;
3887                 r = -EFAULT;
3888                 if (copy_from_user(&reg, argp, sizeof(reg)))
3889                         break;
3890                 if (ioctl == KVM_SET_ONE_REG)
3891                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3892                 else
3893                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3894                 break;
3895         }
3896 #ifdef CONFIG_KVM_S390_UCONTROL
3897         case KVM_S390_UCAS_MAP: {
3898                 struct kvm_s390_ucas_mapping ucasmap;
3899
3900                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3901                         r = -EFAULT;
3902                         break;
3903                 }
3904
3905                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3906                         r = -EINVAL;
3907                         break;
3908                 }
3909
3910                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3911                                      ucasmap.vcpu_addr, ucasmap.length);
3912                 break;
3913         }
3914         case KVM_S390_UCAS_UNMAP: {
3915                 struct kvm_s390_ucas_mapping ucasmap;
3916
3917                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3918                         r = -EFAULT;
3919                         break;
3920                 }
3921
3922                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3923                         r = -EINVAL;
3924                         break;
3925                 }
3926
3927                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3928                         ucasmap.length);
3929                 break;
3930         }
3931 #endif
3932         case KVM_S390_VCPU_FAULT: {
3933                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3934                 break;
3935         }
3936         case KVM_ENABLE_CAP:
3937         {
3938                 struct kvm_enable_cap cap;
3939                 r = -EFAULT;
3940                 if (copy_from_user(&cap, argp, sizeof(cap)))
3941                         break;
3942                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3943                 break;
3944         }
3945         case KVM_S390_MEM_OP: {
3946                 struct kvm_s390_mem_op mem_op;
3947
3948                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3949                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3950                 else
3951                         r = -EFAULT;
3952                 break;
3953         }
3954         case KVM_S390_SET_IRQ_STATE: {
3955                 struct kvm_s390_irq_state irq_state;
3956
3957                 r = -EFAULT;
3958                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3959                         break;
3960                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3961                     irq_state.len == 0 ||
3962                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3963                         r = -EINVAL;
3964                         break;
3965                 }
3966                 /* do not use irq_state.flags, it will break old QEMUs */
3967                 r = kvm_s390_set_irq_state(vcpu,
3968                                            (void __user *) irq_state.buf,
3969                                            irq_state.len);
3970                 break;
3971         }
3972         case KVM_S390_GET_IRQ_STATE: {
3973                 struct kvm_s390_irq_state irq_state;
3974
3975                 r = -EFAULT;
3976                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3977                         break;
3978                 if (irq_state.len == 0) {
3979                         r = -EINVAL;
3980                         break;
3981                 }
3982                 /* do not use irq_state.flags, it will break old QEMUs */
3983                 r = kvm_s390_get_irq_state(vcpu,
3984                                            (__u8 __user *)  irq_state.buf,
3985                                            irq_state.len);
3986                 break;
3987         }
3988         default:
3989                 r = -ENOTTY;
3990         }
3991
3992         vcpu_put(vcpu);
3993         return r;
3994 }
3995
3996 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3997 {
3998 #ifdef CONFIG_KVM_S390_UCONTROL
3999         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4000                  && (kvm_is_ucontrol(vcpu->kvm))) {
4001                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4002                 get_page(vmf->page);
4003                 return 0;
4004         }
4005 #endif
4006         return VM_FAULT_SIGBUS;
4007 }
4008
4009 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4010                             unsigned long npages)
4011 {
4012         return 0;
4013 }
4014
4015 /* Section: memory related */
4016 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4017                                    struct kvm_memory_slot *memslot,
4018                                    const struct kvm_userspace_memory_region *mem,
4019                                    enum kvm_mr_change change)
4020 {
4021         /* A few sanity checks. We can have memory slots which have to be
4022            located/ended at a segment boundary (1MB). The memory in userland is
4023            ok to be fragmented into various different vmas. It is okay to mmap()
4024            and munmap() stuff in this slot after doing this call at any time */
4025
4026         if (mem->userspace_addr & 0xffffful)
4027                 return -EINVAL;
4028
4029         if (mem->memory_size & 0xffffful)
4030                 return -EINVAL;
4031
4032         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4033                 return -EINVAL;
4034
4035         return 0;
4036 }
4037
4038 void kvm_arch_commit_memory_region(struct kvm *kvm,
4039                                 const struct kvm_userspace_memory_region *mem,
4040                                 const struct kvm_memory_slot *old,
4041                                 const struct kvm_memory_slot *new,
4042                                 enum kvm_mr_change change)
4043 {
4044         int rc;
4045
4046         /* If the basics of the memslot do not change, we do not want
4047          * to update the gmap. Every update causes several unnecessary
4048          * segment translation exceptions. This is usually handled just
4049          * fine by the normal fault handler + gmap, but it will also
4050          * cause faults on the prefix page of running guest CPUs.
4051          */
4052         if (old->userspace_addr == mem->userspace_addr &&
4053             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4054             old->npages * PAGE_SIZE == mem->memory_size)
4055                 return;
4056
4057         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4058                 mem->guest_phys_addr, mem->memory_size);
4059         if (rc)
4060                 pr_warn("failed to commit memory region\n");
4061         return;
4062 }
4063
4064 static inline unsigned long nonhyp_mask(int i)
4065 {
4066         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4067
4068         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4069 }
4070
4071 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4072 {
4073         vcpu->valid_wakeup = false;
4074 }
4075
4076 static int __init kvm_s390_init(void)
4077 {
4078         int i;
4079
4080         if (!sclp.has_sief2) {
4081                 pr_info("SIE not available\n");
4082                 return -ENODEV;
4083         }
4084
4085         for (i = 0; i < 16; i++)
4086                 kvm_s390_fac_base[i] |=
4087                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4088
4089         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4090 }
4091
4092 static void __exit kvm_s390_exit(void)
4093 {
4094         kvm_exit();
4095 }
4096
4097 module_init(kvm_s390_init);
4098 module_exit(kvm_s390_exit);
4099
4100 /*
4101  * Enable autoloading of the kvm module.
4102  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4103  * since x86 takes a different approach.
4104  */
4105 #include <linux/miscdevice.h>
4106 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4107 MODULE_ALIAS("devname:kvm");