arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  79         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  80         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  81         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  82         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  83         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  84         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  85         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  86         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  87         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  88         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  89         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  90         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  91         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  92         { "deliver_program", VCPU_STAT(deliver_program) },
  93         { "deliver_io", VCPU_STAT(deliver_io) },
  94         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  95         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  96         { "inject_ckc", VCPU_STAT(inject_ckc) },
  97         { "inject_cputm", VCPU_STAT(inject_cputm) },
  98         { "inject_external_call", VCPU_STAT(inject_external_call) },
  99         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 100         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 101         { "inject_io", VM_STAT(inject_io) },
 102         { "inject_mchk", VCPU_STAT(inject_mchk) },
 103         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 104         { "inject_program", VCPU_STAT(inject_program) },
 105         { "inject_restart", VCPU_STAT(inject_restart) },
 106         { "inject_service_signal", VM_STAT(inject_service_signal) },
 107         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 108         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 109         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 110         { "inject_virtio", VM_STAT(inject_virtio) },
 111         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 112         { "instruction_gs", VCPU_STAT(instruction_gs) },
 113         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 114         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 115         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 116         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 117         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 118         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 119         { "instruction_sck", VCPU_STAT(instruction_sck) },
 120         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 121         { "instruction_spx", VCPU_STAT(instruction_spx) },
 122         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 123         { "instruction_stap", VCPU_STAT(instruction_stap) },
 124         { "instruction_iske", VCPU_STAT(instruction_iske) },
 125         { "instruction_ri", VCPU_STAT(instruction_ri) },
 126         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 127         { "instruction_sske", VCPU_STAT(instruction_sske) },
 128         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 129         { "instruction_essa", VCPU_STAT(instruction_essa) },
 130         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 131         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 132         { "instruction_tb", VCPU_STAT(instruction_tb) },
 133         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 134         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 135         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 136         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 137         { "instruction_sie", VCPU_STAT(instruction_sie) },
 138         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 139         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 140         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 141         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 142         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 143         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 144         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 145         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 146         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 147         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 148         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 149         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 150         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 151         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 152         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 153         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 154         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 155         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 156         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 157         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 158         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 159         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 160         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 161         { NULL }
 162 };
 163
 164 struct kvm_s390_tod_clock_ext {
 165         __u8 epoch_idx;
 166         __u64 tod;
 167         __u8 reserved[7];
 168 } __packed;
 169
 170 /* allow nested virtualization in KVM (if enabled by user space) */
 171 static int nested;
 172 module_param(nested, int, S_IRUGO);
 173 MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175 /* allow 1m huge page guest backing, if !nested */
 176 static int hpage;
 177 module_param(hpage, int, 0444);
 178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 179
 180 /*
 181  * For now we handle at most 16 double words as this is what the s390 base
 182  * kernel handles and stores in the prefix page. If we ever need to go beyond
 183  * this, this requires changes to code, but the external uapi can stay.
 184  */
 185 #define SIZE_INTERNAL 16
 186
 187 /*
 188  * Base feature mask that defines default mask for facilities. Consists of the
 189  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 190  */
 191 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 192 /*
 193  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 194  * and defines the facilities that can be enabled via a cpu model.
 195  */
 196 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 197
 198 static unsigned long kvm_s390_fac_size(void)
 199 {
 200         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 201         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 202         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 203                 sizeof(S390_lowcore.stfle_fac_list));
 204
 205         return SIZE_INTERNAL;
 206 }
 207
 208 /* available cpu features supported by kvm */
 209 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 210 /* available subfunctions indicated via query / "test bit" */
 211 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 212
 213 static struct gmap_notifier gmap_notifier;
 214 static struct gmap_notifier vsie_gmap_notifier;
 215 debug_info_t *kvm_s390_dbf;
 216
 217 /* Section: not file related */
 218 int kvm_arch_hardware_enable(void)
 219 {
 220         /* every s390 is virtualization enabled ;-) */
 221         return 0;
 222 }
 223
 224 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 225                               unsigned long end);
 226
 227 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 228 {
 229         u8 delta_idx = 0;
 230
 231         /*
 232          * The TOD jumps by delta, we have to compensate this by adding
 233          * -delta to the epoch.
 234          */
 235         delta = -delta;
 236
 237         /* sign-extension - we're adding to signed values below */
 238         if ((s64)delta < 0)
 239                 delta_idx = -1;
 240
 241         scb->epoch += delta;
 242         if (scb->ecd & ECD_MEF) {
 243                 scb->epdx += delta_idx;
 244                 if (scb->epoch < delta)
 245                         scb->epdx += 1;
 246         }
 247 }
 248
 249 /*
 250  * This callback is executed during stop_machine(). All CPUs are therefore
 251  * temporarily stopped. In order not to change guest behavior, we have to
 252  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 253  * so a CPU won't be stopped while calculating with the epoch.
 254  */
 255 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 256                           void *v)
 257 {
 258         struct kvm *kvm;
 259         struct kvm_vcpu *vcpu;
 260         int i;
 261         unsigned long long *delta = v;
 262
 263         list_for_each_entry(kvm, &vm_list, vm_list) {
 264                 kvm_for_each_vcpu(i, vcpu, kvm) {
 265                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 266                         if (i == 0) {
 267                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 268                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 269                         }
 270                         if (vcpu->arch.cputm_enabled)
 271                                 vcpu->arch.cputm_start += *delta;
 272                         if (vcpu->arch.vsie_block)
 273                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 274                                                    *delta);
 275                 }
 276         }
 277         return NOTIFY_OK;
 278 }
 279
 280 static struct notifier_block kvm_clock_notifier = {
 281         .notifier_call = kvm_clock_sync,
 282 };
 283
 284 int kvm_arch_hardware_setup(void)
 285 {
 286         gmap_notifier.notifier_call = kvm_gmap_notifier;
 287         gmap_register_pte_notifier(&gmap_notifier);
 288         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 289         gmap_register_pte_notifier(&vsie_gmap_notifier);
 290         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 291                                        &kvm_clock_notifier);
 292         return 0;
 293 }
 294
 295 void kvm_arch_hardware_unsetup(void)
 296 {
 297         gmap_unregister_pte_notifier(&gmap_notifier);
 298         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 299         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 300                                          &kvm_clock_notifier);
 301 }
 302
 303 static void allow_cpu_feat(unsigned long nr)
 304 {
 305         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 306 }
 307
 308 static inline int plo_test_bit(unsigned char nr)
 309 {
 310         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 311         int cc;
 312
 313         asm volatile(
 314                 /* Parameter registers are ignored for "test bit" */
 315                 "       plo     0,0,0,0(0)\n"
 316                 "       ipm     %0\n"
 317                 "       srl     %0,28\n"
 318                 : "=d" (cc)
 319                 : "d" (r0)
 320                 : "cc");
 321         return cc == 0;
 322 }
 323
 324 static void kvm_s390_cpu_feat_init(void)
 325 {
 326         int i;
 327
 328         for (i = 0; i < 256; ++i) {
 329                 if (plo_test_bit(i))
 330                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 331         }
 332
 333         if (test_facility(28)) /* TOD-clock steering */
 334                 ptff(kvm_s390_available_subfunc.ptff,
 335                      sizeof(kvm_s390_available_subfunc.ptff),
 336                      PTFF_QAF);
 337
 338         if (test_facility(17)) { /* MSA */
 339                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 340                               kvm_s390_available_subfunc.kmac);
 341                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 342                               kvm_s390_available_subfunc.kmc);
 343                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 344                               kvm_s390_available_subfunc.km);
 345                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 346                               kvm_s390_available_subfunc.kimd);
 347                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 348                               kvm_s390_available_subfunc.klmd);
 349         }
 350         if (test_facility(76)) /* MSA3 */
 351                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 352                               kvm_s390_available_subfunc.pckmo);
 353         if (test_facility(77)) { /* MSA4 */
 354                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 355                               kvm_s390_available_subfunc.kmctr);
 356                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 357                               kvm_s390_available_subfunc.kmf);
 358                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 359                               kvm_s390_available_subfunc.kmo);
 360                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 361                               kvm_s390_available_subfunc.pcc);
 362         }
 363         if (test_facility(57)) /* MSA5 */
 364                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 365                               kvm_s390_available_subfunc.ppno);
 366
 367         if (test_facility(146)) /* MSA8 */
 368                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 369                               kvm_s390_available_subfunc.kma);
 370
 371         if (MACHINE_HAS_ESOP)
 372                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 373         /*
 374          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 375          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 376          */
 377         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 378             !test_facility(3) || !nested)
 379                 return;
 380         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 381         if (sclp.has_64bscao)
 382                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 383         if (sclp.has_siif)
 384                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 385         if (sclp.has_gpere)
 386                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 387         if (sclp.has_gsls)
 388                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 389         if (sclp.has_ib)
 390                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 391         if (sclp.has_cei)
 392                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 393         if (sclp.has_ibs)
 394                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 395         if (sclp.has_kss)
 396                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 397         /*
 398          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 399          * all skey handling functions read/set the skey from the PGSTE
 400          * instead of the real storage key.
 401          *
 402          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 403          * pages being detected as preserved although they are resident.
 404          *
 405          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 406          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 407          *
 408          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 409          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 410          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 411          *
 412          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 413          * cannot easily shadow the SCA because of the ipte lock.
 414          */
 415 }
 416
 417 int kvm_arch_init(void *opaque)
 418 {
 419         int rc;
 420
 421         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 422         if (!kvm_s390_dbf)
 423                 return -ENOMEM;
 424
 425         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 426                 rc = -ENOMEM;
 427                 goto out_debug_unreg;
 428         }
 429
 430         kvm_s390_cpu_feat_init();
 431
 432         /* Register floating interrupt controller interface. */
 433         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 434         if (rc) {
 435                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 436                 goto out_debug_unreg;
 437         }
 438
 439         rc = kvm_s390_gib_init(GAL_ISC);
 440         if (rc)
 441                 goto out_gib_destroy;
 442
 443         return 0;
 444
 445 out_gib_destroy:
 446         kvm_s390_gib_destroy();
 447 out_debug_unreg:
 448         debug_unregister(kvm_s390_dbf);
 449         return rc;
 450 }
 451
 452 void kvm_arch_exit(void)
 453 {
 454         kvm_s390_gib_destroy();
 455         debug_unregister(kvm_s390_dbf);
 456 }
 457
 458 /* Section: device related */
 459 long kvm_arch_dev_ioctl(struct file *filp,
 460                         unsigned int ioctl, unsigned long arg)
 461 {
 462         if (ioctl == KVM_S390_ENABLE_SIE)
 463                 return s390_enable_sie();
 464         return -EINVAL;
 465 }
 466
 467 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 468 {
 469         int r;
 470
 471         switch (ext) {
 472         case KVM_CAP_S390_PSW:
 473         case KVM_CAP_S390_GMAP:
 474         case KVM_CAP_SYNC_MMU:
 475 #ifdef CONFIG_KVM_S390_UCONTROL
 476         case KVM_CAP_S390_UCONTROL:
 477 #endif
 478         case KVM_CAP_ASYNC_PF:
 479         case KVM_CAP_SYNC_REGS:
 480         case KVM_CAP_ONE_REG:
 481         case KVM_CAP_ENABLE_CAP:
 482         case KVM_CAP_S390_CSS_SUPPORT:
 483         case KVM_CAP_IOEVENTFD:
 484         case KVM_CAP_DEVICE_CTRL:
 485         case KVM_CAP_S390_IRQCHIP:
 486         case KVM_CAP_VM_ATTRIBUTES:
 487         case KVM_CAP_MP_STATE:
 488         case KVM_CAP_IMMEDIATE_EXIT:
 489         case KVM_CAP_S390_INJECT_IRQ:
 490         case KVM_CAP_S390_USER_SIGP:
 491         case KVM_CAP_S390_USER_STSI:
 492         case KVM_CAP_S390_SKEYS:
 493         case KVM_CAP_S390_IRQ_STATE:
 494         case KVM_CAP_S390_USER_INSTR0:
 495         case KVM_CAP_S390_CMMA_MIGRATION:
 496         case KVM_CAP_S390_AIS:
 497         case KVM_CAP_S390_AIS_MIGRATION:
 498                 r = 1;
 499                 break;
 500         case KVM_CAP_S390_HPAGE_1M:
 501                 r = 0;
 502                 if (hpage && !kvm_is_ucontrol(kvm))
 503                         r = 1;
 504                 break;
 505         case KVM_CAP_S390_MEM_OP:
 506                 r = MEM_OP_MAX_SIZE;
 507                 break;
 508         case KVM_CAP_NR_VCPUS:
 509         case KVM_CAP_MAX_VCPUS:
 510                 r = KVM_S390_BSCA_CPU_SLOTS;
 511                 if (!kvm_s390_use_sca_entries())
 512                         r = KVM_MAX_VCPUS;
 513                 else if (sclp.has_esca && sclp.has_64bscao)
 514                         r = KVM_S390_ESCA_CPU_SLOTS;
 515                 break;
 516         case KVM_CAP_S390_COW:
 517                 r = MACHINE_HAS_ESOP;
 518                 break;
 519         case KVM_CAP_S390_VECTOR_REGISTERS:
 520                 r = MACHINE_HAS_VX;
 521                 break;
 522         case KVM_CAP_S390_RI:
 523                 r = test_facility(64);
 524                 break;
 525         case KVM_CAP_S390_GS:
 526                 r = test_facility(133);
 527                 break;
 528         case KVM_CAP_S390_BPB:
 529                 r = test_facility(82);
 530                 break;
 531         default:
 532                 r = 0;
 533         }
 534         return r;
 535 }
 536
 537 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 538                                     struct kvm_memory_slot *memslot)
 539 {
 540         int i;
 541         gfn_t cur_gfn, last_gfn;
 542         unsigned long gaddr, vmaddr;
 543         struct gmap *gmap = kvm->arch.gmap;
 544         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 545
 546         /* Loop over all guest segments */
 547         cur_gfn = memslot->base_gfn;
 548         last_gfn = memslot->base_gfn + memslot->npages;
 549         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 550                 gaddr = gfn_to_gpa(cur_gfn);
 551                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 552                 if (kvm_is_error_hva(vmaddr))
 553                         continue;
 554
 555                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 556                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 557                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 558                         if (test_bit(i, bitmap))
 559                                 mark_page_dirty(kvm, cur_gfn + i);
 560                 }
 561
 562                 if (fatal_signal_pending(current))
 563                         return;
 564                 cond_resched();
 565         }
 566 }
 567
 568 /* Section: vm related */
 569 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 570
 571 /*
 572  * Get (and clear) the dirty memory log for a memory slot.
 573  */
 574 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 575                                struct kvm_dirty_log *log)
 576 {
 577         int r;
 578         unsigned long n;
 579         struct kvm_memslots *slots;
 580         struct kvm_memory_slot *memslot;
 581         int is_dirty = 0;
 582
 583         if (kvm_is_ucontrol(kvm))
 584                 return -EINVAL;
 585
 586         mutex_lock(&kvm->slots_lock);
 587
 588         r = -EINVAL;
 589         if (log->slot >= KVM_USER_MEM_SLOTS)
 590                 goto out;
 591
 592         slots = kvm_memslots(kvm);
 593         memslot = id_to_memslot(slots, log->slot);
 594         r = -ENOENT;
 595         if (!memslot->dirty_bitmap)
 596                 goto out;
 597
 598         kvm_s390_sync_dirty_log(kvm, memslot);
 599         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 600         if (r)
 601                 goto out;
 602
 603         /* Clear the dirty log */
 604         if (is_dirty) {
 605                 n = kvm_dirty_bitmap_bytes(memslot);
 606                 memset(memslot->dirty_bitmap, 0, n);
 607         }
 608         r = 0;
 609 out:
 610         mutex_unlock(&kvm->slots_lock);
 611         return r;
 612 }
 613
 614 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 615 {
 616         unsigned int i;
 617         struct kvm_vcpu *vcpu;
 618
 619         kvm_for_each_vcpu(i, vcpu, kvm) {
 620                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 621         }
 622 }
 623
 624 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 625 {
 626         int r;
 627
 628         if (cap->flags)
 629                 return -EINVAL;
 630
 631         switch (cap->cap) {
 632         case KVM_CAP_S390_IRQCHIP:
 633                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 634                 kvm->arch.use_irqchip = 1;
 635                 r = 0;
 636                 break;
 637         case KVM_CAP_S390_USER_SIGP:
 638                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 639                 kvm->arch.user_sigp = 1;
 640                 r = 0;
 641                 break;
 642         case KVM_CAP_S390_VECTOR_REGISTERS:
 643                 mutex_lock(&kvm->lock);
 644                 if (kvm->created_vcpus) {
 645                         r = -EBUSY;
 646                 } else if (MACHINE_HAS_VX) {
 647                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 648                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 649                         if (test_facility(134)) {
 650                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 651                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 652                         }
 653                         if (test_facility(135)) {
 654                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 655                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 656                         }
 657                         r = 0;
 658                 } else
 659                         r = -EINVAL;
 660                 mutex_unlock(&kvm->lock);
 661                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 662                          r ? "(not available)" : "(success)");
 663                 break;
 664         case KVM_CAP_S390_RI:
 665                 r = -EINVAL;
 666                 mutex_lock(&kvm->lock);
 667                 if (kvm->created_vcpus) {
 668                         r = -EBUSY;
 669                 } else if (test_facility(64)) {
 670                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 671                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 672                         r = 0;
 673                 }
 674                 mutex_unlock(&kvm->lock);
 675                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 676                          r ? "(not available)" : "(success)");
 677                 break;
 678         case KVM_CAP_S390_AIS:
 679                 mutex_lock(&kvm->lock);
 680                 if (kvm->created_vcpus) {
 681                         r = -EBUSY;
 682                 } else {
 683                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 684                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 685                         r = 0;
 686                 }
 687                 mutex_unlock(&kvm->lock);
 688                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 689                          r ? "(not available)" : "(success)");
 690                 break;
 691         case KVM_CAP_S390_GS:
 692                 r = -EINVAL;
 693                 mutex_lock(&kvm->lock);
 694                 if (kvm->created_vcpus) {
 695                         r = -EBUSY;
 696                 } else if (test_facility(133)) {
 697                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 698                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 699                         r = 0;
 700                 }
 701                 mutex_unlock(&kvm->lock);
 702                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 703                          r ? "(not available)" : "(success)");
 704                 break;
 705         case KVM_CAP_S390_HPAGE_1M:
 706                 mutex_lock(&kvm->lock);
 707                 if (kvm->created_vcpus)
 708                         r = -EBUSY;
 709                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 710                         r = -EINVAL;
 711                 else {
 712                         r = 0;
 713                         down_write(&kvm->mm->mmap_sem);
 714                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 715                         up_write(&kvm->mm->mmap_sem);
 716                         /*
 717                          * We might have to create fake 4k page
 718                          * tables. To avoid that the hardware works on
 719                          * stale PGSTEs, we emulate these instructions.
 720                          */
 721                         kvm->arch.use_skf = 0;
 722                         kvm->arch.use_pfmfi = 0;
 723                 }
 724                 mutex_unlock(&kvm->lock);
 725                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 726                          r ? "(not available)" : "(success)");
 727                 break;
 728         case KVM_CAP_S390_USER_STSI:
 729                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 730                 kvm->arch.user_stsi = 1;
 731                 r = 0;
 732                 break;
 733         case KVM_CAP_S390_USER_INSTR0:
 734                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 735                 kvm->arch.user_instr0 = 1;
 736                 icpt_operexc_on_all_vcpus(kvm);
 737                 r = 0;
 738                 break;
 739         default:
 740                 r = -EINVAL;
 741                 break;
 742         }
 743         return r;
 744 }
 745
 746 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 747 {
 748         int ret;
 749
 750         switch (attr->attr) {
 751         case KVM_S390_VM_MEM_LIMIT_SIZE:
 752                 ret = 0;
 753                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 754                          kvm->arch.mem_limit);
 755                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 756                         ret = -EFAULT;
 757                 break;
 758         default:
 759                 ret = -ENXIO;
 760                 break;
 761         }
 762         return ret;
 763 }
 764
 765 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 766 {
 767         int ret;
 768         unsigned int idx;
 769         switch (attr->attr) {
 770         case KVM_S390_VM_MEM_ENABLE_CMMA:
 771                 ret = -ENXIO;
 772                 if (!sclp.has_cmma)
 773                         break;
 774
 775                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 776                 mutex_lock(&kvm->lock);
 777                 if (kvm->created_vcpus)
 778                         ret = -EBUSY;
 779                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 780                         ret = -EINVAL;
 781                 else {
 782                         kvm->arch.use_cmma = 1;
 783                         /* Not compatible with cmma. */
 784                         kvm->arch.use_pfmfi = 0;
 785                         ret = 0;
 786                 }
 787                 mutex_unlock(&kvm->lock);
 788                 break;
 789         case KVM_S390_VM_MEM_CLR_CMMA:
 790                 ret = -ENXIO;
 791                 if (!sclp.has_cmma)
 792                         break;
 793                 ret = -EINVAL;
 794                 if (!kvm->arch.use_cmma)
 795                         break;
 796
 797                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 798                 mutex_lock(&kvm->lock);
 799                 idx = srcu_read_lock(&kvm->srcu);
 800                 s390_reset_cmma(kvm->arch.gmap->mm);
 801                 srcu_read_unlock(&kvm->srcu, idx);
 802                 mutex_unlock(&kvm->lock);
 803                 ret = 0;
 804                 break;
 805         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 806                 unsigned long new_limit;
 807
 808                 if (kvm_is_ucontrol(kvm))
 809                         return -EINVAL;
 810
 811                 if (get_user(new_limit, (u64 __user *)attr->addr))
 812                         return -EFAULT;
 813
 814                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 815                     new_limit > kvm->arch.mem_limit)
 816                         return -E2BIG;
 817
 818                 if (!new_limit)
 819                         return -EINVAL;
 820
 821                 /* gmap_create takes last usable address */
 822                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 823                         new_limit -= 1;
 824
 825                 ret = -EBUSY;
 826                 mutex_lock(&kvm->lock);
 827                 if (!kvm->created_vcpus) {
 828                         /* gmap_create will round the limit up */
 829                         struct gmap *new = gmap_create(current->mm, new_limit);
 830
 831                         if (!new) {
 832                                 ret = -ENOMEM;
 833                         } else {
 834                                 gmap_remove(kvm->arch.gmap);
 835                                 new->private = kvm;
 836                                 kvm->arch.gmap = new;
 837                                 ret = 0;
 838                         }
 839                 }
 840                 mutex_unlock(&kvm->lock);
 841                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 842                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 843                          (void *) kvm->arch.gmap->asce);
 844                 break;
 845         }
 846         default:
 847                 ret = -ENXIO;
 848                 break;
 849         }
 850         return ret;
 851 }
 852
 853 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 854
 855 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 856 {
 857         struct kvm_vcpu *vcpu;
 858         int i;
 859
 860         kvm_s390_vcpu_block_all(kvm);
 861
 862         kvm_for_each_vcpu(i, vcpu, kvm) {
 863                 kvm_s390_vcpu_crypto_setup(vcpu);
 864                 /* recreate the shadow crycb by leaving the VSIE handler */
 865                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 866         }
 867
 868         kvm_s390_vcpu_unblock_all(kvm);
 869 }
 870
 871 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 872 {
 873         mutex_lock(&kvm->lock);
 874         switch (attr->attr) {
 875         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 876                 if (!test_kvm_facility(kvm, 76)) {
 877                         mutex_unlock(&kvm->lock);
 878                         return -EINVAL;
 879                 }
 880                 get_random_bytes(
 881                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 882                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 883                 kvm->arch.crypto.aes_kw = 1;
 884                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 885                 break;
 886         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 887                 if (!test_kvm_facility(kvm, 76)) {
 888                         mutex_unlock(&kvm->lock);
 889                         return -EINVAL;
 890                 }
 891                 get_random_bytes(
 892                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 893                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 894                 kvm->arch.crypto.dea_kw = 1;
 895                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 896                 break;
 897         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 898                 if (!test_kvm_facility(kvm, 76)) {
 899                         mutex_unlock(&kvm->lock);
 900                         return -EINVAL;
 901                 }
 902                 kvm->arch.crypto.aes_kw = 0;
 903                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 904                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 905                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 906                 break;
 907         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 908                 if (!test_kvm_facility(kvm, 76)) {
 909                         mutex_unlock(&kvm->lock);
 910                         return -EINVAL;
 911                 }
 912                 kvm->arch.crypto.dea_kw = 0;
 913                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 914                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 915                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 916                 break;
 917         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 918                 if (!ap_instructions_available()) {
 919                         mutex_unlock(&kvm->lock);
 920                         return -EOPNOTSUPP;
 921                 }
 922                 kvm->arch.crypto.apie = 1;
 923                 break;
 924         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 925                 if (!ap_instructions_available()) {
 926                         mutex_unlock(&kvm->lock);
 927                         return -EOPNOTSUPP;
 928                 }
 929                 kvm->arch.crypto.apie = 0;
 930                 break;
 931         default:
 932                 mutex_unlock(&kvm->lock);
 933                 return -ENXIO;
 934         }
 935
 936         kvm_s390_vcpu_crypto_reset_all(kvm);
 937         mutex_unlock(&kvm->lock);
 938         return 0;
 939 }
 940
 941 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 942 {
 943         int cx;
 944         struct kvm_vcpu *vcpu;
 945
 946         kvm_for_each_vcpu(cx, vcpu, kvm)
 947                 kvm_s390_sync_request(req, vcpu);
 948 }
 949
 950 /*
 951  * Must be called with kvm->srcu held to avoid races on memslots, and with
 952  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 953  */
 954 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 955 {
 956         struct kvm_memory_slot *ms;
 957         struct kvm_memslots *slots;
 958         unsigned long ram_pages = 0;
 959         int slotnr;
 960
 961         /* migration mode already enabled */
 962         if (kvm->arch.migration_mode)
 963                 return 0;
 964         slots = kvm_memslots(kvm);
 965         if (!slots || !slots->used_slots)
 966                 return -EINVAL;
 967
 968         if (!kvm->arch.use_cmma) {
 969                 kvm->arch.migration_mode = 1;
 970                 return 0;
 971         }
 972         /* mark all the pages in active slots as dirty */
 973         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
 974                 ms = slots->memslots + slotnr;
 975                 /*
 976                  * The second half of the bitmap is only used on x86,
 977                  * and would be wasted otherwise, so we put it to good
 978                  * use here to keep track of the state of the storage
 979                  * attributes.
 980                  */
 981                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
 982                 ram_pages += ms->npages;
 983         }
 984         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
 985         kvm->arch.migration_mode = 1;
 986         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
 987         return 0;
 988 }
 989
 990 /*
 991  * Must be called with kvm->slots_lock to avoid races with ourselves and
 992  * kvm_s390_vm_start_migration.
 993  */
 994 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
 995 {
 996         /* migration mode already disabled */
 997         if (!kvm->arch.migration_mode)
 998                 return 0;
 999         kvm->arch.migration_mode = 0;
1000         if (kvm->arch.use_cmma)
1001                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1002         return 0;
1003 }
1004
1005 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1006                                      struct kvm_device_attr *attr)
1007 {
1008         int res = -ENXIO;
1009
1010         mutex_lock(&kvm->slots_lock);
1011         switch (attr->attr) {
1012         case KVM_S390_VM_MIGRATION_START:
1013                 res = kvm_s390_vm_start_migration(kvm);
1014                 break;
1015         case KVM_S390_VM_MIGRATION_STOP:
1016                 res = kvm_s390_vm_stop_migration(kvm);
1017                 break;
1018         default:
1019                 break;
1020         }
1021         mutex_unlock(&kvm->slots_lock);
1022
1023         return res;
1024 }
1025
1026 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1027                                      struct kvm_device_attr *attr)
1028 {
1029         u64 mig = kvm->arch.migration_mode;
1030
1031         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1032                 return -ENXIO;
1033
1034         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1035                 return -EFAULT;
1036         return 0;
1037 }
1038
1039 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041         struct kvm_s390_vm_tod_clock gtod;
1042
1043         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1044                 return -EFAULT;
1045
1046         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1047                 return -EINVAL;
1048         kvm_s390_set_tod_clock(kvm, &gtod);
1049
1050         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1051                 gtod.epoch_idx, gtod.tod);
1052
1053         return 0;
1054 }
1055
1056 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1057 {
1058         u8 gtod_high;
1059
1060         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1061                                            sizeof(gtod_high)))
1062                 return -EFAULT;
1063
1064         if (gtod_high != 0)
1065                 return -EINVAL;
1066         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1067
1068         return 0;
1069 }
1070
1071 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1072 {
1073         struct kvm_s390_vm_tod_clock gtod = { 0 };
1074
1075         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1076                            sizeof(gtod.tod)))
1077                 return -EFAULT;
1078
1079         kvm_s390_set_tod_clock(kvm, &gtod);
1080         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1081         return 0;
1082 }
1083
1084 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1085 {
1086         int ret;
1087
1088         if (attr->flags)
1089                 return -EINVAL;
1090
1091         switch (attr->attr) {
1092         case KVM_S390_VM_TOD_EXT:
1093                 ret = kvm_s390_set_tod_ext(kvm, attr);
1094                 break;
1095         case KVM_S390_VM_TOD_HIGH:
1096                 ret = kvm_s390_set_tod_high(kvm, attr);
1097                 break;
1098         case KVM_S390_VM_TOD_LOW:
1099                 ret = kvm_s390_set_tod_low(kvm, attr);
1100                 break;
1101         default:
1102                 ret = -ENXIO;
1103                 break;
1104         }
1105         return ret;
1106 }
1107
1108 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1109                                    struct kvm_s390_vm_tod_clock *gtod)
1110 {
1111         struct kvm_s390_tod_clock_ext htod;
1112
1113         preempt_disable();
1114
1115         get_tod_clock_ext((char *)&htod);
1116
1117         gtod->tod = htod.tod + kvm->arch.epoch;
1118         gtod->epoch_idx = 0;
1119         if (test_kvm_facility(kvm, 139)) {
1120                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1121                 if (gtod->tod < htod.tod)
1122                         gtod->epoch_idx += 1;
1123         }
1124
1125         preempt_enable();
1126 }
1127
1128 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1129 {
1130         struct kvm_s390_vm_tod_clock gtod;
1131
1132         memset(&gtod, 0, sizeof(gtod));
1133         kvm_s390_get_tod_clock(kvm, &gtod);
1134         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1135                 return -EFAULT;
1136
1137         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1138                 gtod.epoch_idx, gtod.tod);
1139         return 0;
1140 }
1141
1142 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1143 {
1144         u8 gtod_high = 0;
1145
1146         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1147                                          sizeof(gtod_high)))
1148                 return -EFAULT;
1149         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1150
1151         return 0;
1152 }
1153
1154 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1155 {
1156         u64 gtod;
1157
1158         gtod = kvm_s390_get_tod_clock_fast(kvm);
1159         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1160                 return -EFAULT;
1161         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1162
1163         return 0;
1164 }
1165
1166 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         int ret;
1169
1170         if (attr->flags)
1171                 return -EINVAL;
1172
1173         switch (attr->attr) {
1174         case KVM_S390_VM_TOD_EXT:
1175                 ret = kvm_s390_get_tod_ext(kvm, attr);
1176                 break;
1177         case KVM_S390_VM_TOD_HIGH:
1178                 ret = kvm_s390_get_tod_high(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_LOW:
1181                 ret = kvm_s390_get_tod_low(kvm, attr);
1182                 break;
1183         default:
1184                 ret = -ENXIO;
1185                 break;
1186         }
1187         return ret;
1188 }
1189
1190 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1191 {
1192         struct kvm_s390_vm_cpu_processor *proc;
1193         u16 lowest_ibc, unblocked_ibc;
1194         int ret = 0;
1195
1196         mutex_lock(&kvm->lock);
1197         if (kvm->created_vcpus) {
1198                 ret = -EBUSY;
1199                 goto out;
1200         }
1201         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1202         if (!proc) {
1203                 ret = -ENOMEM;
1204                 goto out;
1205         }
1206         if (!copy_from_user(proc, (void __user *)attr->addr,
1207                             sizeof(*proc))) {
1208                 kvm->arch.model.cpuid = proc->cpuid;
1209                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1210                 unblocked_ibc = sclp.ibc & 0xfff;
1211                 if (lowest_ibc && proc->ibc) {
1212                         if (proc->ibc > unblocked_ibc)
1213                                 kvm->arch.model.ibc = unblocked_ibc;
1214                         else if (proc->ibc < lowest_ibc)
1215                                 kvm->arch.model.ibc = lowest_ibc;
1216                         else
1217                                 kvm->arch.model.ibc = proc->ibc;
1218                 }
1219                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1220                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1221                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1222                          kvm->arch.model.ibc,
1223                          kvm->arch.model.cpuid);
1224                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1225                          kvm->arch.model.fac_list[0],
1226                          kvm->arch.model.fac_list[1],
1227                          kvm->arch.model.fac_list[2]);
1228         } else
1229                 ret = -EFAULT;
1230         kfree(proc);
1231 out:
1232         mutex_unlock(&kvm->lock);
1233         return ret;
1234 }
1235
1236 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1237                                        struct kvm_device_attr *attr)
1238 {
1239         struct kvm_s390_vm_cpu_feat data;
1240
1241         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1242                 return -EFAULT;
1243         if (!bitmap_subset((unsigned long *) data.feat,
1244                            kvm_s390_available_cpu_feat,
1245                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1246                 return -EINVAL;
1247
1248         mutex_lock(&kvm->lock);
1249         if (kvm->created_vcpus) {
1250                 mutex_unlock(&kvm->lock);
1251                 return -EBUSY;
1252         }
1253         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1254                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1255         mutex_unlock(&kvm->lock);
1256         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1257                          data.feat[0],
1258                          data.feat[1],
1259                          data.feat[2]);
1260         return 0;
1261 }
1262
1263 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1264                                           struct kvm_device_attr *attr)
1265 {
1266         mutex_lock(&kvm->lock);
1267         if (kvm->created_vcpus) {
1268                 mutex_unlock(&kvm->lock);
1269                 return -EBUSY;
1270         }
1271
1272         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1273                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1274                 mutex_unlock(&kvm->lock);
1275                 return -EFAULT;
1276         }
1277         mutex_unlock(&kvm->lock);
1278
1279         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1280                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1281                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1282                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1283                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1284         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1285                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1286                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1287         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1288                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1289                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1290         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1291                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1292                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1293         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1294                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1295                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1296         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1297                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1298                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1299         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1300                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1301                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1302         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1303                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1304                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1305         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1306                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1307                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1308         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1309                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1310                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1311         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1312                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1313                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1314         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1315                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1316                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1317         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1318                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1319                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1320         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1321                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1322                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1323
1324         return 0;
1325 }
1326
1327 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1328 {
1329         int ret = -ENXIO;
1330
1331         switch (attr->attr) {
1332         case KVM_S390_VM_CPU_PROCESSOR:
1333                 ret = kvm_s390_set_processor(kvm, attr);
1334                 break;
1335         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1336                 ret = kvm_s390_set_processor_feat(kvm, attr);
1337                 break;
1338         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1339                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1340                 break;
1341         }
1342         return ret;
1343 }
1344
1345 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1346 {
1347         struct kvm_s390_vm_cpu_processor *proc;
1348         int ret = 0;
1349
1350         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1351         if (!proc) {
1352                 ret = -ENOMEM;
1353                 goto out;
1354         }
1355         proc->cpuid = kvm->arch.model.cpuid;
1356         proc->ibc = kvm->arch.model.ibc;
1357         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1358                S390_ARCH_FAC_LIST_SIZE_BYTE);
1359         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1360                  kvm->arch.model.ibc,
1361                  kvm->arch.model.cpuid);
1362         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1363                  kvm->arch.model.fac_list[0],
1364                  kvm->arch.model.fac_list[1],
1365                  kvm->arch.model.fac_list[2]);
1366         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1367                 ret = -EFAULT;
1368         kfree(proc);
1369 out:
1370         return ret;
1371 }
1372
1373 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1374 {
1375         struct kvm_s390_vm_cpu_machine *mach;
1376         int ret = 0;
1377
1378         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1379         if (!mach) {
1380                 ret = -ENOMEM;
1381                 goto out;
1382         }
1383         get_cpu_id((struct cpuid *) &mach->cpuid);
1384         mach->ibc = sclp.ibc;
1385         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1386                S390_ARCH_FAC_LIST_SIZE_BYTE);
1387         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1388                sizeof(S390_lowcore.stfle_fac_list));
1389         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1390                  kvm->arch.model.ibc,
1391                  kvm->arch.model.cpuid);
1392         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1393                  mach->fac_mask[0],
1394                  mach->fac_mask[1],
1395                  mach->fac_mask[2]);
1396         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1397                  mach->fac_list[0],
1398                  mach->fac_list[1],
1399                  mach->fac_list[2]);
1400         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1401                 ret = -EFAULT;
1402         kfree(mach);
1403 out:
1404         return ret;
1405 }
1406
1407 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1408                                        struct kvm_device_attr *attr)
1409 {
1410         struct kvm_s390_vm_cpu_feat data;
1411
1412         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1413                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1414         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1415                 return -EFAULT;
1416         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1417                          data.feat[0],
1418                          data.feat[1],
1419                          data.feat[2]);
1420         return 0;
1421 }
1422
1423 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1424                                      struct kvm_device_attr *attr)
1425 {
1426         struct kvm_s390_vm_cpu_feat data;
1427
1428         bitmap_copy((unsigned long *) data.feat,
1429                     kvm_s390_available_cpu_feat,
1430                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1431         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1432                 return -EFAULT;
1433         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1434                          data.feat[0],
1435                          data.feat[1],
1436                          data.feat[2]);
1437         return 0;
1438 }
1439
1440 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1441                                           struct kvm_device_attr *attr)
1442 {
1443         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1444             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1445                 return -EFAULT;
1446
1447         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1448                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1449                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1450                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1451                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1452         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1453                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1454                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1455         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1456                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1457                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1458         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1459                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1460                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1461         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1462                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1463                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1464         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1465                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1466                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1467         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1468                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1469                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1470         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1471                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1472                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1473         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1474                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1475                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1476         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1477                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1478                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1479         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1480                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1481                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1482         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1483                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1484                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1485         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1486                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1487                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1488         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1489                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1490                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1491
1492         return 0;
1493 }
1494
1495 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1496                                         struct kvm_device_attr *attr)
1497 {
1498         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1499             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1500                 return -EFAULT;
1501
1502         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1503                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1504                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1505                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1506                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1507         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1508                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1509                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1510         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1512                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1513         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1515                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1516         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1518                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1519         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1521                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1522         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1524                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1525         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1527                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1528         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1530                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1531         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1533                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1534         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1536                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1537         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1539                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1540         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1542                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1543         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1545                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1546
1547         return 0;
1548 }
1549
1550 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1551 {
1552         int ret = -ENXIO;
1553
1554         switch (attr->attr) {
1555         case KVM_S390_VM_CPU_PROCESSOR:
1556                 ret = kvm_s390_get_processor(kvm, attr);
1557                 break;
1558         case KVM_S390_VM_CPU_MACHINE:
1559                 ret = kvm_s390_get_machine(kvm, attr);
1560                 break;
1561         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1562                 ret = kvm_s390_get_processor_feat(kvm, attr);
1563                 break;
1564         case KVM_S390_VM_CPU_MACHINE_FEAT:
1565                 ret = kvm_s390_get_machine_feat(kvm, attr);
1566                 break;
1567         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1568                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1569                 break;
1570         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1571                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1572                 break;
1573         }
1574         return ret;
1575 }
1576
1577 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1578 {
1579         int ret;
1580
1581         switch (attr->group) {
1582         case KVM_S390_VM_MEM_CTRL:
1583                 ret = kvm_s390_set_mem_control(kvm, attr);
1584                 break;
1585         case KVM_S390_VM_TOD:
1586                 ret = kvm_s390_set_tod(kvm, attr);
1587                 break;
1588         case KVM_S390_VM_CPU_MODEL:
1589                 ret = kvm_s390_set_cpu_model(kvm, attr);
1590                 break;
1591         case KVM_S390_VM_CRYPTO:
1592                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1593                 break;
1594         case KVM_S390_VM_MIGRATION:
1595                 ret = kvm_s390_vm_set_migration(kvm, attr);
1596                 break;
1597         default:
1598                 ret = -ENXIO;
1599                 break;
1600         }
1601
1602         return ret;
1603 }
1604
1605 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1606 {
1607         int ret;
1608
1609         switch (attr->group) {
1610         case KVM_S390_VM_MEM_CTRL:
1611                 ret = kvm_s390_get_mem_control(kvm, attr);
1612                 break;
1613         case KVM_S390_VM_TOD:
1614                 ret = kvm_s390_get_tod(kvm, attr);
1615                 break;
1616         case KVM_S390_VM_CPU_MODEL:
1617                 ret = kvm_s390_get_cpu_model(kvm, attr);
1618                 break;
1619         case KVM_S390_VM_MIGRATION:
1620                 ret = kvm_s390_vm_get_migration(kvm, attr);
1621                 break;
1622         default:
1623                 ret = -ENXIO;
1624                 break;
1625         }
1626
1627         return ret;
1628 }
1629
1630 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1631 {
1632         int ret;
1633
1634         switch (attr->group) {
1635         case KVM_S390_VM_MEM_CTRL:
1636                 switch (attr->attr) {
1637                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1638                 case KVM_S390_VM_MEM_CLR_CMMA:
1639                         ret = sclp.has_cmma ? 0 : -ENXIO;
1640                         break;
1641                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1642                         ret = 0;
1643                         break;
1644                 default:
1645                         ret = -ENXIO;
1646                         break;
1647                 }
1648                 break;
1649         case KVM_S390_VM_TOD:
1650                 switch (attr->attr) {
1651                 case KVM_S390_VM_TOD_LOW:
1652                 case KVM_S390_VM_TOD_HIGH:
1653                         ret = 0;
1654                         break;
1655                 default:
1656                         ret = -ENXIO;
1657                         break;
1658                 }
1659                 break;
1660         case KVM_S390_VM_CPU_MODEL:
1661                 switch (attr->attr) {
1662                 case KVM_S390_VM_CPU_PROCESSOR:
1663                 case KVM_S390_VM_CPU_MACHINE:
1664                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1665                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1666                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1667                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1668                         ret = 0;
1669                         break;
1670                 default:
1671                         ret = -ENXIO;
1672                         break;
1673                 }
1674                 break;
1675         case KVM_S390_VM_CRYPTO:
1676                 switch (attr->attr) {
1677                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1678                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1679                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1680                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1681                         ret = 0;
1682                         break;
1683                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1684                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1685                         ret = ap_instructions_available() ? 0 : -ENXIO;
1686                         break;
1687                 default:
1688                         ret = -ENXIO;
1689                         break;
1690                 }
1691                 break;
1692         case KVM_S390_VM_MIGRATION:
1693                 ret = 0;
1694                 break;
1695         default:
1696                 ret = -ENXIO;
1697                 break;
1698         }
1699
1700         return ret;
1701 }
1702
1703 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1704 {
1705         uint8_t *keys;
1706         uint64_t hva;
1707         int srcu_idx, i, r = 0;
1708
1709         if (args->flags != 0)
1710                 return -EINVAL;
1711
1712         /* Is this guest using storage keys? */
1713         if (!mm_uses_skeys(current->mm))
1714                 return KVM_S390_GET_SKEYS_NONE;
1715
1716         /* Enforce sane limit on memory allocation */
1717         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1718                 return -EINVAL;
1719
1720         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1721         if (!keys)
1722                 return -ENOMEM;
1723
1724         down_read(&current->mm->mmap_sem);
1725         srcu_idx = srcu_read_lock(&kvm->srcu);
1726         for (i = 0; i < args->count; i++) {
1727                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1728                 if (kvm_is_error_hva(hva)) {
1729                         r = -EFAULT;
1730                         break;
1731                 }
1732
1733                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1734                 if (r)
1735                         break;
1736         }
1737         srcu_read_unlock(&kvm->srcu, srcu_idx);
1738         up_read(&current->mm->mmap_sem);
1739
1740         if (!r) {
1741                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1742                                  sizeof(uint8_t) * args->count);
1743                 if (r)
1744                         r = -EFAULT;
1745         }
1746
1747         kvfree(keys);
1748         return r;
1749 }
1750
1751 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1752 {
1753         uint8_t *keys;
1754         uint64_t hva;
1755         int srcu_idx, i, r = 0;
1756         bool unlocked;
1757
1758         if (args->flags != 0)
1759                 return -EINVAL;
1760
1761         /* Enforce sane limit on memory allocation */
1762         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1763                 return -EINVAL;
1764
1765         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1766         if (!keys)
1767                 return -ENOMEM;
1768
1769         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1770                            sizeof(uint8_t) * args->count);
1771         if (r) {
1772                 r = -EFAULT;
1773                 goto out;
1774         }
1775
1776         /* Enable storage key handling for the guest */
1777         r = s390_enable_skey();
1778         if (r)
1779                 goto out;
1780
1781         i = 0;
1782         down_read(&current->mm->mmap_sem);
1783         srcu_idx = srcu_read_lock(&kvm->srcu);
1784         while (i < args->count) {
1785                 unlocked = false;
1786                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1787                 if (kvm_is_error_hva(hva)) {
1788                         r = -EFAULT;
1789                         break;
1790                 }
1791
1792                 /* Lowest order bit is reserved */
1793                 if (keys[i] & 0x01) {
1794                         r = -EINVAL;
1795                         break;
1796                 }
1797
1798                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1799                 if (r) {
1800                         r = fixup_user_fault(current, current->mm, hva,
1801                                              FAULT_FLAG_WRITE, &unlocked);
1802                         if (r)
1803                                 break;
1804                 }
1805                 if (!r)
1806                         i++;
1807         }
1808         srcu_read_unlock(&kvm->srcu, srcu_idx);
1809         up_read(&current->mm->mmap_sem);
1810 out:
1811         kvfree(keys);
1812         return r;
1813 }
1814
1815 /*
1816  * Base address and length must be sent at the start of each block, therefore
1817  * it's cheaper to send some clean data, as long as it's less than the size of
1818  * two longs.
1819  */
1820 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1821 /* for consistency */
1822 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1823
1824 /*
1825  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1826  * address falls in a hole. In that case the index of one of the memslots
1827  * bordering the hole is returned.
1828  */
1829 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1830 {
1831         int start = 0, end = slots->used_slots;
1832         int slot = atomic_read(&slots->lru_slot);
1833         struct kvm_memory_slot *memslots = slots->memslots;
1834
1835         if (gfn >= memslots[slot].base_gfn &&
1836             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1837                 return slot;
1838
1839         while (start < end) {
1840                 slot = start + (end - start) / 2;
1841
1842                 if (gfn >= memslots[slot].base_gfn)
1843                         end = slot;
1844                 else
1845                         start = slot + 1;
1846         }
1847
1848         if (gfn >= memslots[start].base_gfn &&
1849             gfn < memslots[start].base_gfn + memslots[start].npages) {
1850                 atomic_set(&slots->lru_slot, start);
1851         }
1852
1853         return start;
1854 }
1855
1856 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1857                               u8 *res, unsigned long bufsize)
1858 {
1859         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1860
1861         args->count = 0;
1862         while (args->count < bufsize) {
1863                 hva = gfn_to_hva(kvm, cur_gfn);
1864                 /*
1865                  * We return an error if the first value was invalid, but we
1866                  * return successfully if at least one value was copied.
1867                  */
1868                 if (kvm_is_error_hva(hva))
1869                         return args->count ? 0 : -EFAULT;
1870                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1871                         pgstev = 0;
1872                 res[args->count++] = (pgstev >> 24) & 0x43;
1873                 cur_gfn++;
1874         }
1875
1876         return 0;
1877 }
1878
1879 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1880                                               unsigned long cur_gfn)
1881 {
1882         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1883         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1884         unsigned long ofs = cur_gfn - ms->base_gfn;
1885
1886         if (ms->base_gfn + ms->npages <= cur_gfn) {
1887                 slotidx--;
1888                 /* If we are above the highest slot, wrap around */
1889                 if (slotidx < 0)
1890                         slotidx = slots->used_slots - 1;
1891
1892                 ms = slots->memslots + slotidx;
1893                 ofs = 0;
1894         }
1895         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1896         while ((slotidx > 0) && (ofs >= ms->npages)) {
1897                 slotidx--;
1898                 ms = slots->memslots + slotidx;
1899                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1900         }
1901         return ms->base_gfn + ofs;
1902 }
1903
1904 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1905                              u8 *res, unsigned long bufsize)
1906 {
1907         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1908         struct kvm_memslots *slots = kvm_memslots(kvm);
1909         struct kvm_memory_slot *ms;
1910
1911         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1912         ms = gfn_to_memslot(kvm, cur_gfn);
1913         args->count = 0;
1914         args->start_gfn = cur_gfn;
1915         if (!ms)
1916                 return 0;
1917         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1918         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
1919
1920         while (args->count < bufsize) {
1921                 hva = gfn_to_hva(kvm, cur_gfn);
1922                 if (kvm_is_error_hva(hva))
1923                         return 0;
1924                 /* Decrement only if we actually flipped the bit to 0 */
1925                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
1926                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
1927                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1928                         pgstev = 0;
1929                 /* Save the value */
1930                 res[args->count++] = (pgstev >> 24) & 0x43;
1931                 /* If the next bit is too far away, stop. */
1932                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
1933                         return 0;
1934                 /* If we reached the previous "next", find the next one */
1935                 if (cur_gfn == next_gfn)
1936                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
1937                 /* Reached the end of memory or of the buffer, stop */
1938                 if ((next_gfn >= mem_end) ||
1939                     (next_gfn - args->start_gfn >= bufsize))
1940                         return 0;
1941                 cur_gfn++;
1942                 /* Reached the end of the current memslot, take the next one. */
1943                 if (cur_gfn - ms->base_gfn >= ms->npages) {
1944                         ms = gfn_to_memslot(kvm, cur_gfn);
1945                         if (!ms)
1946                                 return 0;
1947                 }
1948         }
1949         return 0;
1950 }
1951
1952 /*
1953  * This function searches for the next page with dirty CMMA attributes, and
1954  * saves the attributes in the buffer up to either the end of the buffer or
1955  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1956  * no trailing clean bytes are saved.
1957  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1958  * output buffer will indicate 0 as length.
1959  */
1960 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1961                                   struct kvm_s390_cmma_log *args)
1962 {
1963         unsigned long bufsize;
1964         int srcu_idx, peek, ret;
1965         u8 *values;
1966
1967         if (!kvm->arch.use_cmma)
1968                 return -ENXIO;
1969         /* Invalid/unsupported flags were specified */
1970         if (args->flags & ~KVM_S390_CMMA_PEEK)
1971                 return -EINVAL;
1972         /* Migration mode query, and we are not doing a migration */
1973         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1974         if (!peek && !kvm->arch.migration_mode)
1975                 return -EINVAL;
1976         /* CMMA is disabled or was not used, or the buffer has length zero */
1977         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1978         if (!bufsize || !kvm->mm->context.uses_cmm) {
1979                 memset(args, 0, sizeof(*args));
1980                 return 0;
1981         }
1982         /* We are not peeking, and there are no dirty pages */
1983         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
1984                 memset(args, 0, sizeof(*args));
1985                 return 0;
1986         }
1987
1988         values = vmalloc(bufsize);
1989         if (!values)
1990                 return -ENOMEM;
1991
1992         down_read(&kvm->mm->mmap_sem);
1993         srcu_idx = srcu_read_lock(&kvm->srcu);
1994         if (peek)
1995                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
1996         else
1997                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
1998         srcu_read_unlock(&kvm->srcu, srcu_idx);
1999         up_read(&kvm->mm->mmap_sem);
2000
2001         if (kvm->arch.migration_mode)
2002                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2003         else
2004                 args->remaining = 0;
2005
2006         if (copy_to_user((void __user *)args->values, values, args->count))
2007                 ret = -EFAULT;
2008
2009         vfree(values);
2010         return ret;
2011 }
2012
2013 /*
2014  * This function sets the CMMA attributes for the given pages. If the input
2015  * buffer has zero length, no action is taken, otherwise the attributes are
2016  * set and the mm->context.uses_cmm flag is set.
2017  */
2018 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2019                                   const struct kvm_s390_cmma_log *args)
2020 {
2021         unsigned long hva, mask, pgstev, i;
2022         uint8_t *bits;
2023         int srcu_idx, r = 0;
2024
2025         mask = args->mask;
2026
2027         if (!kvm->arch.use_cmma)
2028                 return -ENXIO;
2029         /* invalid/unsupported flags */
2030         if (args->flags != 0)
2031                 return -EINVAL;
2032         /* Enforce sane limit on memory allocation */
2033         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2034                 return -EINVAL;
2035         /* Nothing to do */
2036         if (args->count == 0)
2037                 return 0;
2038
2039         bits = vmalloc(array_size(sizeof(*bits), args->count));
2040         if (!bits)
2041                 return -ENOMEM;
2042
2043         r = copy_from_user(bits, (void __user *)args->values, args->count);
2044         if (r) {
2045                 r = -EFAULT;
2046                 goto out;
2047         }
2048
2049         down_read(&kvm->mm->mmap_sem);
2050         srcu_idx = srcu_read_lock(&kvm->srcu);
2051         for (i = 0; i < args->count; i++) {
2052                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2053                 if (kvm_is_error_hva(hva)) {
2054                         r = -EFAULT;
2055                         break;
2056                 }
2057
2058                 pgstev = bits[i];
2059                 pgstev = pgstev << 24;
2060                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2061                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2062         }
2063         srcu_read_unlock(&kvm->srcu, srcu_idx);
2064         up_read(&kvm->mm->mmap_sem);
2065
2066         if (!kvm->mm->context.uses_cmm) {
2067                 down_write(&kvm->mm->mmap_sem);
2068                 kvm->mm->context.uses_cmm = 1;
2069                 up_write(&kvm->mm->mmap_sem);
2070         }
2071 out:
2072         vfree(bits);
2073         return r;
2074 }
2075
2076 long kvm_arch_vm_ioctl(struct file *filp,
2077                        unsigned int ioctl, unsigned long arg)
2078 {
2079         struct kvm *kvm = filp->private_data;
2080         void __user *argp = (void __user *)arg;
2081         struct kvm_device_attr attr;
2082         int r;
2083
2084         switch (ioctl) {
2085         case KVM_S390_INTERRUPT: {
2086                 struct kvm_s390_interrupt s390int;
2087
2088                 r = -EFAULT;
2089                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2090                         break;
2091                 r = kvm_s390_inject_vm(kvm, &s390int);
2092                 break;
2093         }
2094         case KVM_CREATE_IRQCHIP: {
2095                 struct kvm_irq_routing_entry routing;
2096
2097                 r = -EINVAL;
2098                 if (kvm->arch.use_irqchip) {
2099                         /* Set up dummy routing. */
2100                         memset(&routing, 0, sizeof(routing));
2101                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2102                 }
2103                 break;
2104         }
2105         case KVM_SET_DEVICE_ATTR: {
2106                 r = -EFAULT;
2107                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2108                         break;
2109                 r = kvm_s390_vm_set_attr(kvm, &attr);
2110                 break;
2111         }
2112         case KVM_GET_DEVICE_ATTR: {
2113                 r = -EFAULT;
2114                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2115                         break;
2116                 r = kvm_s390_vm_get_attr(kvm, &attr);
2117                 break;
2118         }
2119         case KVM_HAS_DEVICE_ATTR: {
2120                 r = -EFAULT;
2121                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2122                         break;
2123                 r = kvm_s390_vm_has_attr(kvm, &attr);
2124                 break;
2125         }
2126         case KVM_S390_GET_SKEYS: {
2127                 struct kvm_s390_skeys args;
2128
2129                 r = -EFAULT;
2130                 if (copy_from_user(&args, argp,
2131                                    sizeof(struct kvm_s390_skeys)))
2132                         break;
2133                 r = kvm_s390_get_skeys(kvm, &args);
2134                 break;
2135         }
2136         case KVM_S390_SET_SKEYS: {
2137                 struct kvm_s390_skeys args;
2138
2139                 r = -EFAULT;
2140                 if (copy_from_user(&args, argp,
2141                                    sizeof(struct kvm_s390_skeys)))
2142                         break;
2143                 r = kvm_s390_set_skeys(kvm, &args);
2144                 break;
2145         }
2146         case KVM_S390_GET_CMMA_BITS: {
2147                 struct kvm_s390_cmma_log args;
2148
2149                 r = -EFAULT;
2150                 if (copy_from_user(&args, argp, sizeof(args)))
2151                         break;
2152                 mutex_lock(&kvm->slots_lock);
2153                 r = kvm_s390_get_cmma_bits(kvm, &args);
2154                 mutex_unlock(&kvm->slots_lock);
2155                 if (!r) {
2156                         r = copy_to_user(argp, &args, sizeof(args));
2157                         if (r)
2158                                 r = -EFAULT;
2159                 }
2160                 break;
2161         }
2162         case KVM_S390_SET_CMMA_BITS: {
2163                 struct kvm_s390_cmma_log args;
2164
2165                 r = -EFAULT;
2166                 if (copy_from_user(&args, argp, sizeof(args)))
2167                         break;
2168                 mutex_lock(&kvm->slots_lock);
2169                 r = kvm_s390_set_cmma_bits(kvm, &args);
2170                 mutex_unlock(&kvm->slots_lock);
2171                 break;
2172         }
2173         default:
2174                 r = -ENOTTY;
2175         }
2176
2177         return r;
2178 }
2179
2180 static int kvm_s390_apxa_installed(void)
2181 {
2182         struct ap_config_info info;
2183
2184         if (ap_instructions_available()) {
2185                 if (ap_qci(&info) == 0)
2186                         return info.apxa;
2187         }
2188
2189         return 0;
2190 }
2191
2192 /*
2193  * The format of the crypto control block (CRYCB) is specified in the 3 low
2194  * order bits of the CRYCB designation (CRYCBD) field as follows:
2195  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2196  *           AP extended addressing (APXA) facility are installed.
2197  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2198  * Format 2: Both the APXA and MSAX3 facilities are installed
2199  */
2200 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2201 {
2202         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2203
2204         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2205         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2206
2207         /* Check whether MSAX3 is installed */
2208         if (!test_kvm_facility(kvm, 76))
2209                 return;
2210
2211         if (kvm_s390_apxa_installed())
2212                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2213         else
2214                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2215 }
2216
2217 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2218                                unsigned long *aqm, unsigned long *adm)
2219 {
2220         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2221
2222         mutex_lock(&kvm->lock);
2223         kvm_s390_vcpu_block_all(kvm);
2224
2225         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2226         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2227                 memcpy(crycb->apcb1.apm, apm, 32);
2228                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2229                          apm[0], apm[1], apm[2], apm[3]);
2230                 memcpy(crycb->apcb1.aqm, aqm, 32);
2231                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2232                          aqm[0], aqm[1], aqm[2], aqm[3]);
2233                 memcpy(crycb->apcb1.adm, adm, 32);
2234                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2235                          adm[0], adm[1], adm[2], adm[3]);
2236                 break;
2237         case CRYCB_FORMAT1:
2238         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2239                 memcpy(crycb->apcb0.apm, apm, 8);
2240                 memcpy(crycb->apcb0.aqm, aqm, 2);
2241                 memcpy(crycb->apcb0.adm, adm, 2);
2242                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2243                          apm[0], *((unsigned short *)aqm),
2244                          *((unsigned short *)adm));
2245                 break;
2246         default:        /* Can not happen */
2247                 break;
2248         }
2249
2250         /* recreate the shadow crycb for each vcpu */
2251         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2252         kvm_s390_vcpu_unblock_all(kvm);
2253         mutex_unlock(&kvm->lock);
2254 }
2255 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2256
2257 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2258 {
2259         mutex_lock(&kvm->lock);
2260         kvm_s390_vcpu_block_all(kvm);
2261
2262         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2263                sizeof(kvm->arch.crypto.crycb->apcb0));
2264         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2265                sizeof(kvm->arch.crypto.crycb->apcb1));
2266
2267         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2268         /* recreate the shadow crycb for each vcpu */
2269         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2270         kvm_s390_vcpu_unblock_all(kvm);
2271         mutex_unlock(&kvm->lock);
2272 }
2273 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2274
2275 static u64 kvm_s390_get_initial_cpuid(void)
2276 {
2277         struct cpuid cpuid;
2278
2279         get_cpu_id(&cpuid);
2280         cpuid.version = 0xff;
2281         return *((u64 *) &cpuid);
2282 }
2283
2284 static void kvm_s390_crypto_init(struct kvm *kvm)
2285 {
2286         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2287         kvm_s390_set_crycb_format(kvm);
2288
2289         if (!test_kvm_facility(kvm, 76))
2290                 return;
2291
2292         /* Enable AES/DEA protected key functions by default */
2293         kvm->arch.crypto.aes_kw = 1;
2294         kvm->arch.crypto.dea_kw = 1;
2295         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2296                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2297         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2298                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2299 }
2300
2301 static void sca_dispose(struct kvm *kvm)
2302 {
2303         if (kvm->arch.use_esca)
2304                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2305         else
2306                 free_page((unsigned long)(kvm->arch.sca));
2307         kvm->arch.sca = NULL;
2308 }
2309
2310 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2311 {
2312         gfp_t alloc_flags = GFP_KERNEL;
2313         int i, rc;
2314         char debug_name[16];
2315         static unsigned long sca_offset;
2316
2317         rc = -EINVAL;
2318 #ifdef CONFIG_KVM_S390_UCONTROL
2319         if (type & ~KVM_VM_S390_UCONTROL)
2320                 goto out_err;
2321         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2322                 goto out_err;
2323 #else
2324         if (type)
2325                 goto out_err;
2326 #endif
2327
2328         rc = s390_enable_sie();
2329         if (rc)
2330                 goto out_err;
2331
2332         rc = -ENOMEM;
2333
2334         if (!sclp.has_64bscao)
2335                 alloc_flags |= GFP_DMA;
2336         rwlock_init(&kvm->arch.sca_lock);
2337         /* start with basic SCA */
2338         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2339         if (!kvm->arch.sca)
2340                 goto out_err;
2341         spin_lock(&kvm_lock);
2342         sca_offset += 16;
2343         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2344                 sca_offset = 0;
2345         kvm->arch.sca = (struct bsca_block *)
2346                         ((char *) kvm->arch.sca + sca_offset);
2347         spin_unlock(&kvm_lock);
2348
2349         sprintf(debug_name, "kvm-%u", current->pid);
2350
2351         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2352         if (!kvm->arch.dbf)
2353                 goto out_err;
2354
2355         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2356         kvm->arch.sie_page2 =
2357              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2358         if (!kvm->arch.sie_page2)
2359                 goto out_err;
2360
2361         kvm->arch.sie_page2->kvm = kvm;
2362         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2363
2364         for (i = 0; i < kvm_s390_fac_size(); i++) {
2365                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2366                                               (kvm_s390_fac_base[i] |
2367                                                kvm_s390_fac_ext[i]);
2368                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2369                                               kvm_s390_fac_base[i];
2370         }
2371         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2372
2373         /* we are always in czam mode - even on pre z14 machines */
2374         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2375         set_kvm_facility(kvm->arch.model.fac_list, 138);
2376         /* we emulate STHYI in kvm */
2377         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2378         set_kvm_facility(kvm->arch.model.fac_list, 74);
2379         if (MACHINE_HAS_TLB_GUEST) {
2380                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2381                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2382         }
2383
2384         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2385         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2386
2387         kvm_s390_crypto_init(kvm);
2388
2389         mutex_init(&kvm->arch.float_int.ais_lock);
2390         spin_lock_init(&kvm->arch.float_int.lock);
2391         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2392                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2393         init_waitqueue_head(&kvm->arch.ipte_wq);
2394         mutex_init(&kvm->arch.ipte_mutex);
2395
2396         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2397         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2398
2399         if (type & KVM_VM_S390_UCONTROL) {
2400                 kvm->arch.gmap = NULL;
2401                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2402         } else {
2403                 if (sclp.hamax == U64_MAX)
2404                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2405                 else
2406                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2407                                                     sclp.hamax + 1);
2408                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2409                 if (!kvm->arch.gmap)
2410                         goto out_err;
2411                 kvm->arch.gmap->private = kvm;
2412                 kvm->arch.gmap->pfault_enabled = 0;
2413         }
2414
2415         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2416         kvm->arch.use_skf = sclp.has_skey;
2417         spin_lock_init(&kvm->arch.start_stop_lock);
2418         kvm_s390_vsie_init(kvm);
2419         kvm_s390_gisa_init(kvm);
2420         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2421
2422         return 0;
2423 out_err:
2424         free_page((unsigned long)kvm->arch.sie_page2);
2425         debug_unregister(kvm->arch.dbf);
2426         sca_dispose(kvm);
2427         KVM_EVENT(3, "creation of vm failed: %d", rc);
2428         return rc;
2429 }
2430
2431 bool kvm_arch_has_vcpu_debugfs(void)
2432 {
2433         return false;
2434 }
2435
2436 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
2437 {
2438         return 0;
2439 }
2440
2441 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2442 {
2443         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2444         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2445         kvm_s390_clear_local_irqs(vcpu);
2446         kvm_clear_async_pf_completion_queue(vcpu);
2447         if (!kvm_is_ucontrol(vcpu->kvm))
2448                 sca_del_vcpu(vcpu);
2449
2450         if (kvm_is_ucontrol(vcpu->kvm))
2451                 gmap_remove(vcpu->arch.gmap);
2452
2453         if (vcpu->kvm->arch.use_cmma)
2454                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2455         free_page((unsigned long)(vcpu->arch.sie_block));
2456
2457         kvm_vcpu_uninit(vcpu);
2458         kmem_cache_free(kvm_vcpu_cache, vcpu);
2459 }
2460
2461 static void kvm_free_vcpus(struct kvm *kvm)
2462 {
2463         unsigned int i;
2464         struct kvm_vcpu *vcpu;
2465
2466         kvm_for_each_vcpu(i, vcpu, kvm)
2467                 kvm_arch_vcpu_destroy(vcpu);
2468
2469         mutex_lock(&kvm->lock);
2470         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2471                 kvm->vcpus[i] = NULL;
2472
2473         atomic_set(&kvm->online_vcpus, 0);
2474         mutex_unlock(&kvm->lock);
2475 }
2476
2477 void kvm_arch_destroy_vm(struct kvm *kvm)
2478 {
2479         kvm_free_vcpus(kvm);
2480         sca_dispose(kvm);
2481         debug_unregister(kvm->arch.dbf);
2482         kvm_s390_gisa_destroy(kvm);
2483         free_page((unsigned long)kvm->arch.sie_page2);
2484         if (!kvm_is_ucontrol(kvm))
2485                 gmap_remove(kvm->arch.gmap);
2486         kvm_s390_destroy_adapters(kvm);
2487         kvm_s390_clear_float_irqs(kvm);
2488         kvm_s390_vsie_destroy(kvm);
2489         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2490 }
2491
2492 /* Section: vcpu related */
2493 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2494 {
2495         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2496         if (!vcpu->arch.gmap)
2497                 return -ENOMEM;
2498         vcpu->arch.gmap->private = vcpu->kvm;
2499
2500         return 0;
2501 }
2502
2503 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2504 {
2505         if (!kvm_s390_use_sca_entries())
2506                 return;
2507         read_lock(&vcpu->kvm->arch.sca_lock);
2508         if (vcpu->kvm->arch.use_esca) {
2509                 struct esca_block *sca = vcpu->kvm->arch.sca;
2510
2511                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2512                 sca->cpu[vcpu->vcpu_id].sda = 0;
2513         } else {
2514                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2515
2516                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2517                 sca->cpu[vcpu->vcpu_id].sda = 0;
2518         }
2519         read_unlock(&vcpu->kvm->arch.sca_lock);
2520 }
2521
2522 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2523 {
2524         if (!kvm_s390_use_sca_entries()) {
2525                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2526
2527                 /* we still need the basic sca for the ipte control */
2528                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2529                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2530                 return;
2531         }
2532         read_lock(&vcpu->kvm->arch.sca_lock);
2533         if (vcpu->kvm->arch.use_esca) {
2534                 struct esca_block *sca = vcpu->kvm->arch.sca;
2535
2536                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2537                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2538                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2539                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2540                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2541         } else {
2542                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2543
2544                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2545                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2546                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2547                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2548         }
2549         read_unlock(&vcpu->kvm->arch.sca_lock);
2550 }
2551
2552 /* Basic SCA to Extended SCA data copy routines */
2553 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2554 {
2555         d->sda = s->sda;
2556         d->sigp_ctrl.c = s->sigp_ctrl.c;
2557         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2558 }
2559
2560 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2561 {
2562         int i;
2563
2564         d->ipte_control = s->ipte_control;
2565         d->mcn[0] = s->mcn;
2566         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2567                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2568 }
2569
2570 static int sca_switch_to_extended(struct kvm *kvm)
2571 {
2572         struct bsca_block *old_sca = kvm->arch.sca;
2573         struct esca_block *new_sca;
2574         struct kvm_vcpu *vcpu;
2575         unsigned int vcpu_idx;
2576         u32 scaol, scaoh;
2577
2578         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2579         if (!new_sca)
2580                 return -ENOMEM;
2581
2582         scaoh = (u32)((u64)(new_sca) >> 32);
2583         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2584
2585         kvm_s390_vcpu_block_all(kvm);
2586         write_lock(&kvm->arch.sca_lock);
2587
2588         sca_copy_b_to_e(new_sca, old_sca);
2589
2590         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2591                 vcpu->arch.sie_block->scaoh = scaoh;
2592                 vcpu->arch.sie_block->scaol = scaol;
2593                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2594         }
2595         kvm->arch.sca = new_sca;
2596         kvm->arch.use_esca = 1;
2597
2598         write_unlock(&kvm->arch.sca_lock);
2599         kvm_s390_vcpu_unblock_all(kvm);
2600
2601         free_page((unsigned long)old_sca);
2602
2603         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2604                  old_sca, kvm->arch.sca);
2605         return 0;
2606 }
2607
2608 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2609 {
2610         int rc;
2611
2612         if (!kvm_s390_use_sca_entries()) {
2613                 if (id < KVM_MAX_VCPUS)
2614                         return true;
2615                 return false;
2616         }
2617         if (id < KVM_S390_BSCA_CPU_SLOTS)
2618                 return true;
2619         if (!sclp.has_esca || !sclp.has_64bscao)
2620                 return false;
2621
2622         mutex_lock(&kvm->lock);
2623         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2624         mutex_unlock(&kvm->lock);
2625
2626         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2627 }
2628
2629 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2630 {
2631         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2632         kvm_clear_async_pf_completion_queue(vcpu);
2633         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2634                                     KVM_SYNC_GPRS |
2635                                     KVM_SYNC_ACRS |
2636                                     KVM_SYNC_CRS |
2637                                     KVM_SYNC_ARCH0 |
2638                                     KVM_SYNC_PFAULT;
2639         kvm_s390_set_prefix(vcpu, 0);
2640         if (test_kvm_facility(vcpu->kvm, 64))
2641                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2642         if (test_kvm_facility(vcpu->kvm, 82))
2643                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
2644         if (test_kvm_facility(vcpu->kvm, 133))
2645                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2646         if (test_kvm_facility(vcpu->kvm, 156))
2647                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
2648         /* fprs can be synchronized via vrs, even if the guest has no vx. With
2649          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2650          */
2651         if (MACHINE_HAS_VX)
2652                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2653         else
2654                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2655
2656         if (kvm_is_ucontrol(vcpu->kvm))
2657                 return __kvm_ucontrol_vcpu_init(vcpu);
2658
2659         return 0;
2660 }
2661
2662 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2663 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2664 {
2665         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2666         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2667         vcpu->arch.cputm_start = get_tod_clock_fast();
2668         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2669 }
2670
2671 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2672 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2673 {
2674         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2675         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2676         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2677         vcpu->arch.cputm_start = 0;
2678         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2679 }
2680
2681 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2682 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2683 {
2684         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2685         vcpu->arch.cputm_enabled = true;
2686         __start_cpu_timer_accounting(vcpu);
2687 }
2688
2689 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2690 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2691 {
2692         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2693         __stop_cpu_timer_accounting(vcpu);
2694         vcpu->arch.cputm_enabled = false;
2695 }
2696
2697 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2698 {
2699         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2700         __enable_cpu_timer_accounting(vcpu);
2701         preempt_enable();
2702 }
2703
2704 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2705 {
2706         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2707         __disable_cpu_timer_accounting(vcpu);
2708         preempt_enable();
2709 }
2710
2711 /* set the cpu timer - may only be called from the VCPU thread itself */
2712 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2713 {
2714         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2715         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2716         if (vcpu->arch.cputm_enabled)
2717                 vcpu->arch.cputm_start = get_tod_clock_fast();
2718         vcpu->arch.sie_block->cputm = cputm;
2719         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2720         preempt_enable();
2721 }
2722
2723 /* update and get the cpu timer - can also be called from other VCPU threads */
2724 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2725 {
2726         unsigned int seq;
2727         __u64 value;
2728
2729         if (unlikely(!vcpu->arch.cputm_enabled))
2730                 return vcpu->arch.sie_block->cputm;
2731
2732         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2733         do {
2734                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2735                 /*
2736                  * If the writer would ever execute a read in the critical
2737                  * section, e.g. in irq context, we have a deadlock.
2738                  */
2739                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2740                 value = vcpu->arch.sie_block->cputm;
2741                 /* if cputm_start is 0, accounting is being started/stopped */
2742                 if (likely(vcpu->arch.cputm_start))
2743                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2744         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2745         preempt_enable();
2746         return value;
2747 }
2748
2749 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2750 {
2751
2752         gmap_enable(vcpu->arch.enabled_gmap);
2753         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2754         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2755                 __start_cpu_timer_accounting(vcpu);
2756         vcpu->cpu = cpu;
2757 }
2758
2759 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2760 {
2761         vcpu->cpu = -1;
2762         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2763                 __stop_cpu_timer_accounting(vcpu);
2764         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2765         vcpu->arch.enabled_gmap = gmap_get_enabled();
2766         gmap_disable(vcpu->arch.enabled_gmap);
2767
2768 }
2769
2770 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2771 {
2772         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2773         vcpu->arch.sie_block->gpsw.mask = 0UL;
2774         vcpu->arch.sie_block->gpsw.addr = 0UL;
2775         kvm_s390_set_prefix(vcpu, 0);
2776         kvm_s390_set_cpu_timer(vcpu, 0);
2777         vcpu->arch.sie_block->ckc       = 0UL;
2778         vcpu->arch.sie_block->todpr     = 0;
2779         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2780         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2781                                         CR0_INTERRUPT_KEY_SUBMASK |
2782                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2783         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2784                                         CR14_UNUSED_33 |
2785                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2786         /* make sure the new fpc will be lazily loaded */
2787         save_fpu_regs();
2788         current->thread.fpu.fpc = 0;
2789         vcpu->arch.sie_block->gbea = 1;
2790         vcpu->arch.sie_block->pp = 0;
2791         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2792         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2793         kvm_clear_async_pf_completion_queue(vcpu);
2794         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2795                 kvm_s390_vcpu_stop(vcpu);
2796         kvm_s390_clear_local_irqs(vcpu);
2797 }
2798
2799 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2800 {
2801         mutex_lock(&vcpu->kvm->lock);
2802         preempt_disable();
2803         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2804         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2805         preempt_enable();
2806         mutex_unlock(&vcpu->kvm->lock);
2807         if (!kvm_is_ucontrol(vcpu->kvm)) {
2808                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2809                 sca_add_vcpu(vcpu);
2810         }
2811         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2812                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2813         /* make vcpu_load load the right gmap on the first trigger */
2814         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2815 }
2816
2817 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2818 {
2819         /*
2820          * If the AP instructions are not being interpreted and the MSAX3
2821          * facility is not configured for the guest, there is nothing to set up.
2822          */
2823         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2824                 return;
2825
2826         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2827         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2828         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2829
2830         if (vcpu->kvm->arch.crypto.apie)
2831                 vcpu->arch.sie_block->eca |= ECA_APIE;
2832
2833         /* Set up protected key support */
2834         if (vcpu->kvm->arch.crypto.aes_kw)
2835                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2836         if (vcpu->kvm->arch.crypto.dea_kw)
2837                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2838 }
2839
2840 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2841 {
2842         free_page(vcpu->arch.sie_block->cbrlo);
2843         vcpu->arch.sie_block->cbrlo = 0;
2844 }
2845
2846 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2847 {
2848         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2849         if (!vcpu->arch.sie_block->cbrlo)
2850                 return -ENOMEM;
2851         return 0;
2852 }
2853
2854 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2855 {
2856         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2857
2858         vcpu->arch.sie_block->ibc = model->ibc;
2859         if (test_kvm_facility(vcpu->kvm, 7))
2860                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2861 }
2862
2863 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2864 {
2865         int rc = 0;
2866
2867         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2868                                                     CPUSTAT_SM |
2869                                                     CPUSTAT_STOPPED);
2870
2871         if (test_kvm_facility(vcpu->kvm, 78))
2872                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2873         else if (test_kvm_facility(vcpu->kvm, 8))
2874                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2875
2876         kvm_s390_vcpu_setup_model(vcpu);
2877
2878         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2879         if (MACHINE_HAS_ESOP)
2880                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2881         if (test_kvm_facility(vcpu->kvm, 9))
2882                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2883         if (test_kvm_facility(vcpu->kvm, 73))
2884                 vcpu->arch.sie_block->ecb |= ECB_TE;
2885
2886         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2887                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2888         if (test_kvm_facility(vcpu->kvm, 130))
2889                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2890         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2891         if (sclp.has_cei)
2892                 vcpu->arch.sie_block->eca |= ECA_CEI;
2893         if (sclp.has_ib)
2894                 vcpu->arch.sie_block->eca |= ECA_IB;
2895         if (sclp.has_siif)
2896                 vcpu->arch.sie_block->eca |= ECA_SII;
2897         if (sclp.has_sigpif)
2898                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2899         if (test_kvm_facility(vcpu->kvm, 129)) {
2900                 vcpu->arch.sie_block->eca |= ECA_VX;
2901                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2902         }
2903         if (test_kvm_facility(vcpu->kvm, 139))
2904                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2905         if (test_kvm_facility(vcpu->kvm, 156))
2906                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2907         if (vcpu->arch.sie_block->gd) {
2908                 vcpu->arch.sie_block->eca |= ECA_AIV;
2909                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2910                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2911         }
2912         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2913                                         | SDNXC;
2914         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2915
2916         if (sclp.has_kss)
2917                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2918         else
2919                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2920
2921         if (vcpu->kvm->arch.use_cmma) {
2922                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2923                 if (rc)
2924                         return rc;
2925         }
2926         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2927         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2928
2929         vcpu->arch.sie_block->hpid = HPID_KVM;
2930
2931         kvm_s390_vcpu_crypto_setup(vcpu);
2932
2933         return rc;
2934 }
2935
2936 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2937                                       unsigned int id)
2938 {
2939         struct kvm_vcpu *vcpu;
2940         struct sie_page *sie_page;
2941         int rc = -EINVAL;
2942
2943         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2944                 goto out;
2945
2946         rc = -ENOMEM;
2947
2948         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2949         if (!vcpu)
2950                 goto out;
2951
2952         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2953         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2954         if (!sie_page)
2955                 goto out_free_cpu;
2956
2957         vcpu->arch.sie_block = &sie_page->sie_block;
2958         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2959
2960         /* the real guest size will always be smaller than msl */
2961         vcpu->arch.sie_block->mso = 0;
2962         vcpu->arch.sie_block->msl = sclp.hamax;
2963
2964         vcpu->arch.sie_block->icpua = id;
2965         spin_lock_init(&vcpu->arch.local_int.lock);
2966         vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
2967         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
2968                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
2969         seqcount_init(&vcpu->arch.cputm_seqcount);
2970
2971         rc = kvm_vcpu_init(vcpu, kvm, id);
2972         if (rc)
2973                 goto out_free_sie_block;
2974         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2975                  vcpu->arch.sie_block);
2976         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2977
2978         return vcpu;
2979 out_free_sie_block:
2980         free_page((unsigned long)(vcpu->arch.sie_block));
2981 out_free_cpu:
2982         kmem_cache_free(kvm_vcpu_cache, vcpu);
2983 out:
2984         return ERR_PTR(rc);
2985 }
2986
2987 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2988 {
2989         return kvm_s390_vcpu_has_irq(vcpu, 0);
2990 }
2991
2992 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2993 {
2994         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2995 }
2996
2997 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2998 {
2999         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3000         exit_sie(vcpu);
3001 }
3002
3003 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3004 {
3005         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3006 }
3007
3008 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3009 {
3010         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3011         exit_sie(vcpu);
3012 }
3013
3014 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3015 {
3016         return atomic_read(&vcpu->arch.sie_block->prog20) &
3017                (PROG_BLOCK_SIE | PROG_REQUEST);
3018 }
3019
3020 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3021 {
3022         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3023 }
3024
3025 /*
3026  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3027  * If the CPU is not running (e.g. waiting as idle) the function will
3028  * return immediately. */
3029 void exit_sie(struct kvm_vcpu *vcpu)
3030 {
3031         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3032         kvm_s390_vsie_kick(vcpu);
3033         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3034                 cpu_relax();
3035 }
3036
3037 /* Kick a guest cpu out of SIE to process a request synchronously */
3038 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3039 {
3040         kvm_make_request(req, vcpu);
3041         kvm_s390_vcpu_request(vcpu);
3042 }
3043
3044 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3045                               unsigned long end)
3046 {
3047         struct kvm *kvm = gmap->private;
3048         struct kvm_vcpu *vcpu;
3049         unsigned long prefix;
3050         int i;
3051
3052         if (gmap_is_shadow(gmap))
3053                 return;
3054         if (start >= 1UL << 31)
3055                 /* We are only interested in prefix pages */
3056                 return;
3057         kvm_for_each_vcpu(i, vcpu, kvm) {
3058                 /* match against both prefix pages */
3059                 prefix = kvm_s390_get_prefix(vcpu);
3060                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3061                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3062                                    start, end);
3063                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3064                 }
3065         }
3066 }
3067
3068 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3069 {
3070         /* kvm common code refers to this, but never calls it */
3071         BUG();
3072         return 0;
3073 }
3074
3075 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3076                                            struct kvm_one_reg *reg)
3077 {
3078         int r = -EINVAL;
3079
3080         switch (reg->id) {
3081         case KVM_REG_S390_TODPR:
3082                 r = put_user(vcpu->arch.sie_block->todpr,
3083                              (u32 __user *)reg->addr);
3084                 break;
3085         case KVM_REG_S390_EPOCHDIFF:
3086                 r = put_user(vcpu->arch.sie_block->epoch,
3087                              (u64 __user *)reg->addr);
3088                 break;
3089         case KVM_REG_S390_CPU_TIMER:
3090                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3091                              (u64 __user *)reg->addr);
3092                 break;
3093         case KVM_REG_S390_CLOCK_COMP:
3094                 r = put_user(vcpu->arch.sie_block->ckc,
3095                              (u64 __user *)reg->addr);
3096                 break;
3097         case KVM_REG_S390_PFTOKEN:
3098                 r = put_user(vcpu->arch.pfault_token,
3099                              (u64 __user *)reg->addr);
3100                 break;
3101         case KVM_REG_S390_PFCOMPARE:
3102                 r = put_user(vcpu->arch.pfault_compare,
3103                              (u64 __user *)reg->addr);
3104                 break;
3105         case KVM_REG_S390_PFSELECT:
3106                 r = put_user(vcpu->arch.pfault_select,
3107                              (u64 __user *)reg->addr);
3108                 break;
3109         case KVM_REG_S390_PP:
3110                 r = put_user(vcpu->arch.sie_block->pp,
3111                              (u64 __user *)reg->addr);
3112                 break;
3113         case KVM_REG_S390_GBEA:
3114                 r = put_user(vcpu->arch.sie_block->gbea,
3115                              (u64 __user *)reg->addr);
3116                 break;
3117         default:
3118                 break;
3119         }
3120
3121         return r;
3122 }
3123
3124 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3125                                            struct kvm_one_reg *reg)
3126 {
3127         int r = -EINVAL;
3128         __u64 val;
3129
3130         switch (reg->id) {
3131         case KVM_REG_S390_TODPR:
3132                 r = get_user(vcpu->arch.sie_block->todpr,
3133                              (u32 __user *)reg->addr);
3134                 break;
3135         case KVM_REG_S390_EPOCHDIFF:
3136                 r = get_user(vcpu->arch.sie_block->epoch,
3137                              (u64 __user *)reg->addr);
3138                 break;
3139         case KVM_REG_S390_CPU_TIMER:
3140                 r = get_user(val, (u64 __user *)reg->addr);
3141                 if (!r)
3142                         kvm_s390_set_cpu_timer(vcpu, val);
3143                 break;
3144         case KVM_REG_S390_CLOCK_COMP:
3145                 r = get_user(vcpu->arch.sie_block->ckc,
3146                              (u64 __user *)reg->addr);
3147                 break;
3148         case KVM_REG_S390_PFTOKEN:
3149                 r = get_user(vcpu->arch.pfault_token,
3150                              (u64 __user *)reg->addr);
3151                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3152                         kvm_clear_async_pf_completion_queue(vcpu);
3153                 break;
3154         case KVM_REG_S390_PFCOMPARE:
3155                 r = get_user(vcpu->arch.pfault_compare,
3156                              (u64 __user *)reg->addr);
3157                 break;
3158         case KVM_REG_S390_PFSELECT:
3159                 r = get_user(vcpu->arch.pfault_select,
3160                              (u64 __user *)reg->addr);
3161                 break;
3162         case KVM_REG_S390_PP:
3163                 r = get_user(vcpu->arch.sie_block->pp,
3164                              (u64 __user *)reg->addr);
3165                 break;
3166         case KVM_REG_S390_GBEA:
3167                 r = get_user(vcpu->arch.sie_block->gbea,
3168                              (u64 __user *)reg->addr);
3169                 break;
3170         default:
3171                 break;
3172         }
3173
3174         return r;
3175 }
3176
3177 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3178 {
3179         kvm_s390_vcpu_initial_reset(vcpu);
3180         return 0;
3181 }
3182
3183 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3184 {
3185         vcpu_load(vcpu);
3186         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3187         vcpu_put(vcpu);
3188         return 0;
3189 }
3190
3191 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3192 {
3193         vcpu_load(vcpu);
3194         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3195         vcpu_put(vcpu);
3196         return 0;
3197 }
3198
3199 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3200                                   struct kvm_sregs *sregs)
3201 {
3202         vcpu_load(vcpu);
3203
3204         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3205         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3206
3207         vcpu_put(vcpu);
3208         return 0;
3209 }
3210
3211 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3212                                   struct kvm_sregs *sregs)
3213 {
3214         vcpu_load(vcpu);
3215
3216         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3217         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3218
3219         vcpu_put(vcpu);
3220         return 0;
3221 }
3222
3223 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3224 {
3225         int ret = 0;
3226
3227         vcpu_load(vcpu);
3228
3229         if (test_fp_ctl(fpu->fpc)) {
3230                 ret = -EINVAL;
3231                 goto out;
3232         }
3233         vcpu->run->s.regs.fpc = fpu->fpc;
3234         if (MACHINE_HAS_VX)
3235                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3236                                  (freg_t *) fpu->fprs);
3237         else
3238                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3239
3240 out:
3241         vcpu_put(vcpu);
3242         return ret;
3243 }
3244
3245 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3246 {
3247         vcpu_load(vcpu);
3248
3249         /* make sure we have the latest values */
3250         save_fpu_regs();
3251         if (MACHINE_HAS_VX)
3252                 convert_vx_to_fp((freg_t *) fpu->fprs,
3253                                  (__vector128 *) vcpu->run->s.regs.vrs);
3254         else
3255                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3256         fpu->fpc = vcpu->run->s.regs.fpc;
3257
3258         vcpu_put(vcpu);
3259         return 0;
3260 }
3261
3262 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3263 {
3264         int rc = 0;
3265
3266         if (!is_vcpu_stopped(vcpu))
3267                 rc = -EBUSY;
3268         else {
3269                 vcpu->run->psw_mask = psw.mask;
3270                 vcpu->run->psw_addr = psw.addr;
3271         }
3272         return rc;
3273 }
3274
3275 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3276                                   struct kvm_translation *tr)
3277 {
3278         return -EINVAL; /* not implemented yet */
3279 }
3280
3281 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3282                               KVM_GUESTDBG_USE_HW_BP | \
3283                               KVM_GUESTDBG_ENABLE)
3284
3285 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3286                                         struct kvm_guest_debug *dbg)
3287 {
3288         int rc = 0;
3289
3290         vcpu_load(vcpu);
3291
3292         vcpu->guest_debug = 0;
3293         kvm_s390_clear_bp_data(vcpu);
3294
3295         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3296                 rc = -EINVAL;
3297                 goto out;
3298         }
3299         if (!sclp.has_gpere) {
3300                 rc = -EINVAL;
3301                 goto out;
3302         }
3303
3304         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3305                 vcpu->guest_debug = dbg->control;
3306                 /* enforce guest PER */
3307                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3308
3309                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3310                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3311         } else {
3312                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3313                 vcpu->arch.guestdbg.last_bp = 0;
3314         }
3315
3316         if (rc) {
3317                 vcpu->guest_debug = 0;
3318                 kvm_s390_clear_bp_data(vcpu);
3319                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3320         }
3321
3322 out:
3323         vcpu_put(vcpu);
3324         return rc;
3325 }
3326
3327 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3328                                     struct kvm_mp_state *mp_state)
3329 {
3330         int ret;
3331
3332         vcpu_load(vcpu);
3333
3334         /* CHECK_STOP and LOAD are not supported yet */
3335         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3336                                       KVM_MP_STATE_OPERATING;
3337
3338         vcpu_put(vcpu);
3339         return ret;
3340 }
3341
3342 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3343                                     struct kvm_mp_state *mp_state)
3344 {
3345         int rc = 0;
3346
3347         vcpu_load(vcpu);
3348
3349         /* user space knows about this interface - let it control the state */
3350         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3351
3352         switch (mp_state->mp_state) {
3353         case KVM_MP_STATE_STOPPED:
3354                 kvm_s390_vcpu_stop(vcpu);
3355                 break;
3356         case KVM_MP_STATE_OPERATING:
3357                 kvm_s390_vcpu_start(vcpu);
3358                 break;
3359         case KVM_MP_STATE_LOAD:
3360         case KVM_MP_STATE_CHECK_STOP:
3361                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3362         default:
3363                 rc = -ENXIO;
3364         }
3365
3366         vcpu_put(vcpu);
3367         return rc;
3368 }
3369
3370 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3371 {
3372         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3373 }
3374
3375 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3376 {
3377 retry:
3378         kvm_s390_vcpu_request_handled(vcpu);
3379         if (!kvm_request_pending(vcpu))
3380                 return 0;
3381         /*
3382          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3383          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3384          * This ensures that the ipte instruction for this request has
3385          * already finished. We might race against a second unmapper that
3386          * wants to set the blocking bit. Lets just retry the request loop.
3387          */
3388         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3389                 int rc;
3390                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3391                                           kvm_s390_get_prefix(vcpu),
3392                                           PAGE_SIZE * 2, PROT_WRITE);
3393                 if (rc) {
3394                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3395                         return rc;
3396                 }
3397                 goto retry;
3398         }
3399
3400         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3401                 vcpu->arch.sie_block->ihcpu = 0xffff;
3402                 goto retry;
3403         }
3404
3405         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3406                 if (!ibs_enabled(vcpu)) {
3407                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3408                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3409                 }
3410                 goto retry;
3411         }
3412
3413         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3414                 if (ibs_enabled(vcpu)) {
3415                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3416                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3417                 }
3418                 goto retry;
3419         }
3420
3421         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3422                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3423                 goto retry;
3424         }
3425
3426         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3427                 /*
3428                  * Disable CMM virtualization; we will emulate the ESSA
3429                  * instruction manually, in order to provide additional
3430                  * functionalities needed for live migration.
3431                  */
3432                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3433                 goto retry;
3434         }
3435
3436         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3437                 /*
3438                  * Re-enable CMM virtualization if CMMA is available and
3439                  * CMM has been used.
3440                  */
3441                 if ((vcpu->kvm->arch.use_cmma) &&
3442                     (vcpu->kvm->mm->context.uses_cmm))
3443                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3444                 goto retry;
3445         }
3446
3447         /* nothing to do, just clear the request */
3448         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3449         /* we left the vsie handler, nothing to do, just clear the request */
3450         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3451
3452         return 0;
3453 }
3454
3455 void kvm_s390_set_tod_clock(struct kvm *kvm,
3456                             const struct kvm_s390_vm_tod_clock *gtod)
3457 {
3458         struct kvm_vcpu *vcpu;
3459         struct kvm_s390_tod_clock_ext htod;
3460         int i;
3461
3462         mutex_lock(&kvm->lock);
3463         preempt_disable();
3464
3465         get_tod_clock_ext((char *)&htod);
3466
3467         kvm->arch.epoch = gtod->tod - htod.tod;
3468         kvm->arch.epdx = 0;
3469         if (test_kvm_facility(kvm, 139)) {
3470                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3471                 if (kvm->arch.epoch > gtod->tod)
3472                         kvm->arch.epdx -= 1;
3473         }
3474
3475         kvm_s390_vcpu_block_all(kvm);
3476         kvm_for_each_vcpu(i, vcpu, kvm) {
3477                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3478                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3479         }
3480
3481         kvm_s390_vcpu_unblock_all(kvm);
3482         preempt_enable();
3483         mutex_unlock(&kvm->lock);
3484 }
3485
3486 /**
3487  * kvm_arch_fault_in_page - fault-in guest page if necessary
3488  * @vcpu: The corresponding virtual cpu
3489  * @gpa: Guest physical address
3490  * @writable: Whether the page should be writable or not
3491  *
3492  * Make sure that a guest page has been faulted-in on the host.
3493  *
3494  * Return: Zero on success, negative error code otherwise.
3495  */
3496 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3497 {
3498         return gmap_fault(vcpu->arch.gmap, gpa,
3499                           writable ? FAULT_FLAG_WRITE : 0);
3500 }
3501
3502 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3503                                       unsigned long token)
3504 {
3505         struct kvm_s390_interrupt inti;
3506         struct kvm_s390_irq irq;
3507
3508         if (start_token) {
3509                 irq.u.ext.ext_params2 = token;
3510                 irq.type = KVM_S390_INT_PFAULT_INIT;
3511                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3512         } else {
3513                 inti.type = KVM_S390_INT_PFAULT_DONE;
3514                 inti.parm64 = token;
3515                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3516         }
3517 }
3518
3519 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3520                                      struct kvm_async_pf *work)
3521 {
3522         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3523         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3524 }
3525
3526 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3527                                  struct kvm_async_pf *work)
3528 {
3529         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3530         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3531 }
3532
3533 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3534                                struct kvm_async_pf *work)
3535 {
3536         /* s390 will always inject the page directly */
3537 }
3538
3539 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3540 {
3541         /*
3542          * s390 will always inject the page directly,
3543          * but we still want check_async_completion to cleanup
3544          */
3545         return true;
3546 }
3547
3548 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3549 {
3550         hva_t hva;
3551         struct kvm_arch_async_pf arch;
3552         int rc;
3553
3554         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3555                 return 0;
3556         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3557             vcpu->arch.pfault_compare)
3558                 return 0;
3559         if (psw_extint_disabled(vcpu))
3560                 return 0;
3561         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3562                 return 0;
3563         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3564                 return 0;
3565         if (!vcpu->arch.gmap->pfault_enabled)
3566                 return 0;
3567
3568         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3569         hva += current->thread.gmap_addr & ~PAGE_MASK;
3570         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3571                 return 0;
3572
3573         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3574         return rc;
3575 }
3576
3577 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3578 {
3579         int rc, cpuflags;
3580
3581         /*
3582          * On s390 notifications for arriving pages will be delivered directly
3583          * to the guest but the house keeping for completed pfaults is
3584          * handled outside the worker.
3585          */
3586         kvm_check_async_pf_completion(vcpu);
3587
3588         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3589         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3590
3591         if (need_resched())
3592                 schedule();
3593
3594         if (test_cpu_flag(CIF_MCCK_PENDING))
3595                 s390_handle_mcck();
3596
3597         if (!kvm_is_ucontrol(vcpu->kvm)) {
3598                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3599                 if (rc)
3600                         return rc;
3601         }
3602
3603         rc = kvm_s390_handle_requests(vcpu);
3604         if (rc)
3605                 return rc;
3606
3607         if (guestdbg_enabled(vcpu)) {
3608                 kvm_s390_backup_guest_per_regs(vcpu);
3609                 kvm_s390_patch_guest_per_regs(vcpu);
3610         }
3611
3612         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3613
3614         vcpu->arch.sie_block->icptcode = 0;
3615         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3616         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3617         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3618
3619         return 0;
3620 }
3621
3622 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3623 {
3624         struct kvm_s390_pgm_info pgm_info = {
3625                 .code = PGM_ADDRESSING,
3626         };
3627         u8 opcode, ilen;
3628         int rc;
3629
3630         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3631         trace_kvm_s390_sie_fault(vcpu);
3632
3633         /*
3634          * We want to inject an addressing exception, which is defined as a
3635          * suppressing or terminating exception. However, since we came here
3636          * by a DAT access exception, the PSW still points to the faulting
3637          * instruction since DAT exceptions are nullifying. So we've got
3638          * to look up the current opcode to get the length of the instruction
3639          * to be able to forward the PSW.
3640          */
3641         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3642         ilen = insn_length(opcode);
3643         if (rc < 0) {
3644                 return rc;
3645         } else if (rc) {
3646                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3647                  * Forward by arbitrary ilc, injection will take care of
3648                  * nullification if necessary.
3649                  */
3650                 pgm_info = vcpu->arch.pgm;
3651                 ilen = 4;
3652         }
3653         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3654         kvm_s390_forward_psw(vcpu, ilen);
3655         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3656 }
3657
3658 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3659 {
3660         struct mcck_volatile_info *mcck_info;
3661         struct sie_page *sie_page;
3662
3663         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3664                    vcpu->arch.sie_block->icptcode);
3665         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3666
3667         if (guestdbg_enabled(vcpu))
3668                 kvm_s390_restore_guest_per_regs(vcpu);
3669
3670         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3671         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3672
3673         if (exit_reason == -EINTR) {
3674                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3675                 sie_page = container_of(vcpu->arch.sie_block,
3676                                         struct sie_page, sie_block);
3677                 mcck_info = &sie_page->mcck_info;
3678                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3679                 return 0;
3680         }
3681
3682         if (vcpu->arch.sie_block->icptcode > 0) {
3683                 int rc = kvm_handle_sie_intercept(vcpu);
3684
3685                 if (rc != -EOPNOTSUPP)
3686                         return rc;
3687                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3688                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3689                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3690                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3691                 return -EREMOTE;
3692         } else if (exit_reason != -EFAULT) {
3693                 vcpu->stat.exit_null++;
3694                 return 0;
3695         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3696                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3697                 vcpu->run->s390_ucontrol.trans_exc_code =
3698                                                 current->thread.gmap_addr;
3699                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3700                 return -EREMOTE;
3701         } else if (current->thread.gmap_pfault) {
3702                 trace_kvm_s390_major_guest_pfault(vcpu);
3703                 current->thread.gmap_pfault = 0;
3704                 if (kvm_arch_setup_async_pf(vcpu))
3705                         return 0;
3706                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3707         }
3708         return vcpu_post_run_fault_in_sie(vcpu);
3709 }
3710
3711 static int __vcpu_run(struct kvm_vcpu *vcpu)
3712 {
3713         int rc, exit_reason;
3714
3715         /*
3716          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3717          * ning the guest), so that memslots (and other stuff) are protected
3718          */
3719         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3720
3721         do {
3722                 rc = vcpu_pre_run(vcpu);
3723                 if (rc)
3724                         break;
3725
3726                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3727                 /*
3728                  * As PF_VCPU will be used in fault handler, between
3729                  * guest_enter and guest_exit should be no uaccess.
3730                  */
3731                 local_irq_disable();
3732                 guest_enter_irqoff();
3733                 __disable_cpu_timer_accounting(vcpu);
3734                 local_irq_enable();
3735                 exit_reason = sie64a(vcpu->arch.sie_block,
3736                                      vcpu->run->s.regs.gprs);
3737                 local_irq_disable();
3738                 __enable_cpu_timer_accounting(vcpu);
3739                 guest_exit_irqoff();
3740                 local_irq_enable();
3741                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3742
3743                 rc = vcpu_post_run(vcpu, exit_reason);
3744         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3745
3746         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3747         return rc;
3748 }
3749
3750 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3751 {
3752         struct runtime_instr_cb *riccb;
3753         struct gs_cb *gscb;
3754
3755         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3756         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3757         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3758         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3759         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3760                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3761         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3762                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3763                 /* some control register changes require a tlb flush */
3764                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3765         }
3766         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3767                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3768                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3769                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3770                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3771                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3772         }
3773         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3774                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3775                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3776                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3777                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3778                         kvm_clear_async_pf_completion_queue(vcpu);
3779         }
3780         /*
3781          * If userspace sets the riccb (e.g. after migration) to a valid state,
3782          * we should enable RI here instead of doing the lazy enablement.
3783          */
3784         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3785             test_kvm_facility(vcpu->kvm, 64) &&
3786             riccb->v &&
3787             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3788                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3789                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3790         }
3791         /*
3792          * If userspace sets the gscb (e.g. after migration) to non-zero,
3793          * we should enable GS here instead of doing the lazy enablement.
3794          */
3795         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3796             test_kvm_facility(vcpu->kvm, 133) &&
3797             gscb->gssm &&
3798             !vcpu->arch.gs_enabled) {
3799                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3800                 vcpu->arch.sie_block->ecb |= ECB_GS;
3801                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3802                 vcpu->arch.gs_enabled = 1;
3803         }
3804         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3805             test_kvm_facility(vcpu->kvm, 82)) {
3806                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3807                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3808         }
3809         save_access_regs(vcpu->arch.host_acrs);
3810         restore_access_regs(vcpu->run->s.regs.acrs);
3811         /* save host (userspace) fprs/vrs */
3812         save_fpu_regs();
3813         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3814         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3815         if (MACHINE_HAS_VX)
3816                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3817         else
3818                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3819         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3820         if (test_fp_ctl(current->thread.fpu.fpc))
3821                 /* User space provided an invalid FPC, let's clear it */
3822                 current->thread.fpu.fpc = 0;
3823         if (MACHINE_HAS_GS) {
3824                 preempt_disable();
3825                 __ctl_set_bit(2, 4);
3826                 if (current->thread.gs_cb) {
3827                         vcpu->arch.host_gscb = current->thread.gs_cb;
3828                         save_gs_cb(vcpu->arch.host_gscb);
3829                 }
3830                 if (vcpu->arch.gs_enabled) {
3831                         current->thread.gs_cb = (struct gs_cb *)
3832                                                 &vcpu->run->s.regs.gscb;
3833                         restore_gs_cb(current->thread.gs_cb);
3834                 }
3835                 preempt_enable();
3836         }
3837         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3838
3839         kvm_run->kvm_dirty_regs = 0;
3840 }
3841
3842 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3843 {
3844         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3845         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3846         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3847         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3848         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3849         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3850         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3851         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3852         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3853         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3854         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3855         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3856         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3857         save_access_regs(vcpu->run->s.regs.acrs);
3858         restore_access_regs(vcpu->arch.host_acrs);
3859         /* Save guest register state */
3860         save_fpu_regs();
3861         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3862         /* Restore will be done lazily at return */
3863         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3864         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3865         if (MACHINE_HAS_GS) {
3866                 __ctl_set_bit(2, 4);
3867                 if (vcpu->arch.gs_enabled)
3868                         save_gs_cb(current->thread.gs_cb);
3869                 preempt_disable();
3870                 current->thread.gs_cb = vcpu->arch.host_gscb;
3871                 restore_gs_cb(vcpu->arch.host_gscb);
3872                 preempt_enable();
3873                 if (!vcpu->arch.host_gscb)
3874                         __ctl_clear_bit(2, 4);
3875                 vcpu->arch.host_gscb = NULL;
3876         }
3877         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3878 }
3879
3880 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3881 {
3882         int rc;
3883
3884         if (kvm_run->immediate_exit)
3885                 return -EINTR;
3886
3887         vcpu_load(vcpu);
3888
3889         if (guestdbg_exit_pending(vcpu)) {
3890                 kvm_s390_prepare_debug_exit(vcpu);
3891                 rc = 0;
3892                 goto out;
3893         }
3894
3895         kvm_sigset_activate(vcpu);
3896
3897         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3898                 kvm_s390_vcpu_start(vcpu);
3899         } else if (is_vcpu_stopped(vcpu)) {
3900                 pr_err_ratelimited("can't run stopped vcpu %d\n",
3901                                    vcpu->vcpu_id);
3902                 rc = -EINVAL;
3903                 goto out;
3904         }
3905
3906         sync_regs(vcpu, kvm_run);
3907         enable_cpu_timer_accounting(vcpu);
3908
3909         might_fault();
3910         rc = __vcpu_run(vcpu);
3911
3912         if (signal_pending(current) && !rc) {
3913                 kvm_run->exit_reason = KVM_EXIT_INTR;
3914                 rc = -EINTR;
3915         }
3916
3917         if (guestdbg_exit_pending(vcpu) && !rc)  {
3918                 kvm_s390_prepare_debug_exit(vcpu);
3919                 rc = 0;
3920         }
3921
3922         if (rc == -EREMOTE) {
3923                 /* userspace support is needed, kvm_run has been prepared */
3924                 rc = 0;
3925         }
3926
3927         disable_cpu_timer_accounting(vcpu);
3928         store_regs(vcpu, kvm_run);
3929
3930         kvm_sigset_deactivate(vcpu);
3931
3932         vcpu->stat.exit_userspace++;
3933 out:
3934         vcpu_put(vcpu);
3935         return rc;
3936 }
3937
3938 /*
3939  * store status at address
3940  * we use have two special cases:
3941  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3942  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3943  */
3944 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3945 {
3946         unsigned char archmode = 1;
3947         freg_t fprs[NUM_FPRS];
3948         unsigned int px;
3949         u64 clkcomp, cputm;
3950         int rc;
3951
3952         px = kvm_s390_get_prefix(vcpu);
3953         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3954                 if (write_guest_abs(vcpu, 163, &archmode, 1))
3955                         return -EFAULT;
3956                 gpa = 0;
3957         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3958                 if (write_guest_real(vcpu, 163, &archmode, 1))
3959                         return -EFAULT;
3960                 gpa = px;
3961         } else
3962                 gpa -= __LC_FPREGS_SAVE_AREA;
3963
3964         /* manually convert vector registers if necessary */
3965         if (MACHINE_HAS_VX) {
3966                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3967                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3968                                      fprs, 128);
3969         } else {
3970                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3971                                      vcpu->run->s.regs.fprs, 128);
3972         }
3973         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3974                               vcpu->run->s.regs.gprs, 128);
3975         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3976                               &vcpu->arch.sie_block->gpsw, 16);
3977         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3978                               &px, 4);
3979         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3980                               &vcpu->run->s.regs.fpc, 4);
3981         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3982                               &vcpu->arch.sie_block->todpr, 4);
3983         cputm = kvm_s390_get_cpu_timer(vcpu);
3984         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3985                               &cputm, 8);
3986         clkcomp = vcpu->arch.sie_block->ckc >> 8;
3987         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3988                               &clkcomp, 8);
3989         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3990                               &vcpu->run->s.regs.acrs, 64);
3991         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3992                               &vcpu->arch.sie_block->gcr, 128);
3993         return rc ? -EFAULT : 0;
3994 }
3995
3996 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3997 {
3998         /*
3999          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4000          * switch in the run ioctl. Let's update our copies before we save
4001          * it into the save area
4002          */
4003         save_fpu_regs();
4004         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4005         save_access_regs(vcpu->run->s.regs.acrs);
4006
4007         return kvm_s390_store_status_unloaded(vcpu, addr);
4008 }
4009
4010 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4011 {
4012         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4013         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4014 }
4015
4016 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4017 {
4018         unsigned int i;
4019         struct kvm_vcpu *vcpu;
4020
4021         kvm_for_each_vcpu(i, vcpu, kvm) {
4022                 __disable_ibs_on_vcpu(vcpu);
4023         }
4024 }
4025
4026 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4027 {
4028         if (!sclp.has_ibs)
4029                 return;
4030         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4031         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4032 }
4033
4034 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4035 {
4036         int i, online_vcpus, started_vcpus = 0;
4037
4038         if (!is_vcpu_stopped(vcpu))
4039                 return;
4040
4041         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4042         /* Only one cpu at a time may enter/leave the STOPPED state. */
4043         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4044         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4045
4046         for (i = 0; i < online_vcpus; i++) {
4047                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4048                         started_vcpus++;
4049         }
4050
4051         if (started_vcpus == 0) {
4052                 /* we're the only active VCPU -> speed it up */
4053                 __enable_ibs_on_vcpu(vcpu);
4054         } else if (started_vcpus == 1) {
4055                 /*
4056                  * As we are starting a second VCPU, we have to disable
4057                  * the IBS facility on all VCPUs to remove potentially
4058                  * oustanding ENABLE requests.
4059                  */
4060                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4061         }
4062
4063         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4064         /*
4065          * Another VCPU might have used IBS while we were offline.
4066          * Let's play safe and flush the VCPU at startup.
4067          */
4068         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4069         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4070         return;
4071 }
4072
4073 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4074 {
4075         int i, online_vcpus, started_vcpus = 0;
4076         struct kvm_vcpu *started_vcpu = NULL;
4077
4078         if (is_vcpu_stopped(vcpu))
4079                 return;
4080
4081         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4082         /* Only one cpu at a time may enter/leave the STOPPED state. */
4083         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4084         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4085
4086         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4087         kvm_s390_clear_stop_irq(vcpu);
4088
4089         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4090         __disable_ibs_on_vcpu(vcpu);
4091
4092         for (i = 0; i < online_vcpus; i++) {
4093                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4094                         started_vcpus++;
4095                         started_vcpu = vcpu->kvm->vcpus[i];
4096                 }
4097         }
4098
4099         if (started_vcpus == 1) {
4100                 /*
4101                  * As we only have one VCPU left, we want to enable the
4102                  * IBS facility for that VCPU to speed it up.
4103                  */
4104                 __enable_ibs_on_vcpu(started_vcpu);
4105         }
4106
4107         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4108         return;
4109 }
4110
4111 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4112                                      struct kvm_enable_cap *cap)
4113 {
4114         int r;
4115
4116         if (cap->flags)
4117                 return -EINVAL;
4118
4119         switch (cap->cap) {
4120         case KVM_CAP_S390_CSS_SUPPORT:
4121                 if (!vcpu->kvm->arch.css_support) {
4122                         vcpu->kvm->arch.css_support = 1;
4123                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4124                         trace_kvm_s390_enable_css(vcpu->kvm);
4125                 }
4126                 r = 0;
4127                 break;
4128         default:
4129                 r = -EINVAL;
4130                 break;
4131         }
4132         return r;
4133 }
4134
4135 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4136                                   struct kvm_s390_mem_op *mop)
4137 {
4138         void __user *uaddr = (void __user *)mop->buf;
4139         void *tmpbuf = NULL;
4140         int r, srcu_idx;
4141         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4142                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4143
4144         if (mop->flags & ~supported_flags)
4145                 return -EINVAL;
4146
4147         if (mop->size > MEM_OP_MAX_SIZE)
4148                 return -E2BIG;
4149
4150         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4151                 tmpbuf = vmalloc(mop->size);
4152                 if (!tmpbuf)
4153                         return -ENOMEM;
4154         }
4155
4156         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4157
4158         switch (mop->op) {
4159         case KVM_S390_MEMOP_LOGICAL_READ:
4160                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4161                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4162                                             mop->size, GACC_FETCH);
4163                         break;
4164                 }
4165                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4166                 if (r == 0) {
4167                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4168                                 r = -EFAULT;
4169                 }
4170                 break;
4171         case KVM_S390_MEMOP_LOGICAL_WRITE:
4172                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4173                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4174                                             mop->size, GACC_STORE);
4175                         break;
4176                 }
4177                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4178                         r = -EFAULT;
4179                         break;
4180                 }
4181                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4182                 break;
4183         default:
4184                 r = -EINVAL;
4185         }
4186
4187         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4188
4189         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4190                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4191
4192         vfree(tmpbuf);
4193         return r;
4194 }
4195
4196 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4197                                unsigned int ioctl, unsigned long arg)
4198 {
4199         struct kvm_vcpu *vcpu = filp->private_data;
4200         void __user *argp = (void __user *)arg;
4201
4202         switch (ioctl) {
4203         case KVM_S390_IRQ: {
4204                 struct kvm_s390_irq s390irq;
4205
4206                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4207                         return -EFAULT;
4208                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4209         }
4210         case KVM_S390_INTERRUPT: {
4211                 struct kvm_s390_interrupt s390int;
4212                 struct kvm_s390_irq s390irq;
4213
4214                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4215                         return -EFAULT;
4216                 if (s390int_to_s390irq(&s390int, &s390irq))
4217                         return -EINVAL;
4218                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4219         }
4220         }
4221         return -ENOIOCTLCMD;
4222 }
4223
4224 long kvm_arch_vcpu_ioctl(struct file *filp,
4225                          unsigned int ioctl, unsigned long arg)
4226 {
4227         struct kvm_vcpu *vcpu = filp->private_data;
4228         void __user *argp = (void __user *)arg;
4229         int idx;
4230         long r;
4231
4232         vcpu_load(vcpu);
4233
4234         switch (ioctl) {
4235         case KVM_S390_STORE_STATUS:
4236                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4237                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4238                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4239                 break;
4240         case KVM_S390_SET_INITIAL_PSW: {
4241                 psw_t psw;
4242
4243                 r = -EFAULT;
4244                 if (copy_from_user(&psw, argp, sizeof(psw)))
4245                         break;
4246                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4247                 break;
4248         }
4249         case KVM_S390_INITIAL_RESET:
4250                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4251                 break;
4252         case KVM_SET_ONE_REG:
4253         case KVM_GET_ONE_REG: {
4254                 struct kvm_one_reg reg;
4255                 r = -EFAULT;
4256                 if (copy_from_user(&reg, argp, sizeof(reg)))
4257                         break;
4258                 if (ioctl == KVM_SET_ONE_REG)
4259                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4260                 else
4261                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4262                 break;
4263         }
4264 #ifdef CONFIG_KVM_S390_UCONTROL
4265         case KVM_S390_UCAS_MAP: {
4266                 struct kvm_s390_ucas_mapping ucasmap;
4267
4268                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4269                         r = -EFAULT;
4270                         break;
4271                 }
4272
4273                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4274                         r = -EINVAL;
4275                         break;
4276                 }
4277
4278                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4279                                      ucasmap.vcpu_addr, ucasmap.length);
4280                 break;
4281         }
4282         case KVM_S390_UCAS_UNMAP: {
4283                 struct kvm_s390_ucas_mapping ucasmap;
4284
4285                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4286                         r = -EFAULT;
4287                         break;
4288                 }
4289
4290                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4291                         r = -EINVAL;
4292                         break;
4293                 }
4294
4295                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4296                         ucasmap.length);
4297                 break;
4298         }
4299 #endif
4300         case KVM_S390_VCPU_FAULT: {
4301                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4302                 break;
4303         }
4304         case KVM_ENABLE_CAP:
4305         {
4306                 struct kvm_enable_cap cap;
4307                 r = -EFAULT;
4308                 if (copy_from_user(&cap, argp, sizeof(cap)))
4309                         break;
4310                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4311                 break;
4312         }
4313         case KVM_S390_MEM_OP: {
4314                 struct kvm_s390_mem_op mem_op;
4315
4316                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4317                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4318                 else
4319                         r = -EFAULT;
4320                 break;
4321         }
4322         case KVM_S390_SET_IRQ_STATE: {
4323                 struct kvm_s390_irq_state irq_state;
4324
4325                 r = -EFAULT;
4326                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4327                         break;
4328                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4329                     irq_state.len == 0 ||
4330                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4331                         r = -EINVAL;
4332                         break;
4333                 }
4334                 /* do not use irq_state.flags, it will break old QEMUs */
4335                 r = kvm_s390_set_irq_state(vcpu,
4336                                            (void __user *) irq_state.buf,
4337                                            irq_state.len);
4338                 break;
4339         }
4340         case KVM_S390_GET_IRQ_STATE: {
4341                 struct kvm_s390_irq_state irq_state;
4342
4343                 r = -EFAULT;
4344                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4345                         break;
4346                 if (irq_state.len == 0) {
4347                         r = -EINVAL;
4348                         break;
4349                 }
4350                 /* do not use irq_state.flags, it will break old QEMUs */
4351                 r = kvm_s390_get_irq_state(vcpu,
4352                                            (__u8 __user *)  irq_state.buf,
4353                                            irq_state.len);
4354                 break;
4355         }
4356         default:
4357                 r = -ENOTTY;
4358         }
4359
4360         vcpu_put(vcpu);
4361         return r;
4362 }
4363
4364 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4365 {
4366 #ifdef CONFIG_KVM_S390_UCONTROL
4367         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4368                  && (kvm_is_ucontrol(vcpu->kvm))) {
4369                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4370                 get_page(vmf->page);
4371                 return 0;
4372         }
4373 #endif
4374         return VM_FAULT_SIGBUS;
4375 }
4376
4377 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4378                             unsigned long npages)
4379 {
4380         return 0;
4381 }
4382
4383 /* Section: memory related */
4384 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4385                                    struct kvm_memory_slot *memslot,
4386                                    const struct kvm_userspace_memory_region *mem,
4387                                    enum kvm_mr_change change)
4388 {
4389         /* A few sanity checks. We can have memory slots which have to be
4390            located/ended at a segment boundary (1MB). The memory in userland is
4391            ok to be fragmented into various different vmas. It is okay to mmap()
4392            and munmap() stuff in this slot after doing this call at any time */
4393
4394         if (mem->userspace_addr & 0xffffful)
4395                 return -EINVAL;
4396
4397         if (mem->memory_size & 0xffffful)
4398                 return -EINVAL;
4399
4400         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4401                 return -EINVAL;
4402
4403         return 0;
4404 }
4405
4406 void kvm_arch_commit_memory_region(struct kvm *kvm,
4407                                 const struct kvm_userspace_memory_region *mem,
4408                                 const struct kvm_memory_slot *old,
4409                                 const struct kvm_memory_slot *new,
4410                                 enum kvm_mr_change change)
4411 {
4412         int rc;
4413
4414         /* If the basics of the memslot do not change, we do not want
4415          * to update the gmap. Every update causes several unnecessary
4416          * segment translation exceptions. This is usually handled just
4417          * fine by the normal fault handler + gmap, but it will also
4418          * cause faults on the prefix page of running guest CPUs.
4419          */
4420         if (old->userspace_addr == mem->userspace_addr &&
4421             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
4422             old->npages * PAGE_SIZE == mem->memory_size)
4423                 return;
4424
4425         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4426                 mem->guest_phys_addr, mem->memory_size);
4427         if (rc)
4428                 pr_warn("failed to commit memory region\n");
4429         return;
4430 }
4431
4432 static inline unsigned long nonhyp_mask(int i)
4433 {
4434         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4435
4436         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4437 }
4438
4439 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4440 {
4441         vcpu->valid_wakeup = false;
4442 }
4443
4444 static int __init kvm_s390_init(void)
4445 {
4446         int i;
4447
4448         if (!sclp.has_sief2) {
4449                 pr_info("SIE is not available\n");
4450                 return -ENODEV;
4451         }
4452
4453         if (nested && hpage) {
4454                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4455                 return -EINVAL;
4456         }
4457
4458         for (i = 0; i < 16; i++)
4459                 kvm_s390_fac_base[i] |=
4460                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4461
4462         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4463 }
4464
4465 static void __exit kvm_s390_exit(void)
4466 {
4467         kvm_exit();
4468 }
4469
4470 module_init(kvm_s390_init);
4471 module_exit(kvm_s390_exit);
4472
4473 /*
4474  * Enable autoloading of the kvm module.
4475  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4476  * since x86 takes a different approach.
4477  */
4478 #include <linux/miscdevice.h>
4479 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4480 MODULE_ALIAS("devname:kvm");