arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2018
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include "kvm-s390.h"
  48 #include "gaccess.h"
  49
  50 #define CREATE_TRACE_POINTS
  51 #include "trace.h"
  52 #include "trace-s390.h"
  53
  54 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  55 #define LOCAL_IRQS 32
  56 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  57                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  58
  59 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  60 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
  61
  62 struct kvm_stats_debugfs_item debugfs_entries[] = {
  63         { "userspace_handled", VCPU_STAT(exit_userspace) },
  64         { "exit_null", VCPU_STAT(exit_null) },
  65         { "exit_validity", VCPU_STAT(exit_validity) },
  66         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  67         { "exit_external_request", VCPU_STAT(exit_external_request) },
  68         { "exit_io_request", VCPU_STAT(exit_io_request) },
  69         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  70         { "exit_instruction", VCPU_STAT(exit_instruction) },
  71         { "exit_pei", VCPU_STAT(exit_pei) },
  72         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  73         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  74         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  75         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  76         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  77         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  78         { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
  79         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  80         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  81         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  82         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  83         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  84         { "deliver_ckc", VCPU_STAT(deliver_ckc) },
  85         { "deliver_cputm", VCPU_STAT(deliver_cputm) },
  86         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  87         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  88         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  89         { "deliver_virtio", VCPU_STAT(deliver_virtio) },
  90         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  91         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  92         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  93         { "deliver_program", VCPU_STAT(deliver_program) },
  94         { "deliver_io", VCPU_STAT(deliver_io) },
  95         { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
  96         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  97         { "inject_ckc", VCPU_STAT(inject_ckc) },
  98         { "inject_cputm", VCPU_STAT(inject_cputm) },
  99         { "inject_external_call", VCPU_STAT(inject_external_call) },
 100         { "inject_float_mchk", VM_STAT(inject_float_mchk) },
 101         { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
 102         { "inject_io", VM_STAT(inject_io) },
 103         { "inject_mchk", VCPU_STAT(inject_mchk) },
 104         { "inject_pfault_done", VM_STAT(inject_pfault_done) },
 105         { "inject_program", VCPU_STAT(inject_program) },
 106         { "inject_restart", VCPU_STAT(inject_restart) },
 107         { "inject_service_signal", VM_STAT(inject_service_signal) },
 108         { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
 109         { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
 110         { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
 111         { "inject_virtio", VM_STAT(inject_virtio) },
 112         { "instruction_epsw", VCPU_STAT(instruction_epsw) },
 113         { "instruction_gs", VCPU_STAT(instruction_gs) },
 114         { "instruction_io_other", VCPU_STAT(instruction_io_other) },
 115         { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
 116         { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
 117         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
 118         { "instruction_ptff", VCPU_STAT(instruction_ptff) },
 119         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
 120         { "instruction_sck", VCPU_STAT(instruction_sck) },
 121         { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
 122         { "instruction_spx", VCPU_STAT(instruction_spx) },
 123         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
 124         { "instruction_stap", VCPU_STAT(instruction_stap) },
 125         { "instruction_iske", VCPU_STAT(instruction_iske) },
 126         { "instruction_ri", VCPU_STAT(instruction_ri) },
 127         { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
 128         { "instruction_sske", VCPU_STAT(instruction_sske) },
 129         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
 130         { "instruction_essa", VCPU_STAT(instruction_essa) },
 131         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
 132         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
 133         { "instruction_tb", VCPU_STAT(instruction_tb) },
 134         { "instruction_tpi", VCPU_STAT(instruction_tpi) },
 135         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 136         { "instruction_tsch", VCPU_STAT(instruction_tsch) },
 137         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 138         { "instruction_sie", VCPU_STAT(instruction_sie) },
 139         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 140         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 141         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 142         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 143         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 144         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 145         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 146         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 147         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 148         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 149         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 150         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 151         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 152         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 153         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 154         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 155         { "instruction_diag_10", VCPU_STAT(diagnose_10) },
 156         { "instruction_diag_44", VCPU_STAT(diagnose_44) },
 157         { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
 158         { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) },
 159         { "instruction_diag_258", VCPU_STAT(diagnose_258) },
 160         { "instruction_diag_308", VCPU_STAT(diagnose_308) },
 161         { "instruction_diag_500", VCPU_STAT(diagnose_500) },
 162         { "instruction_diag_other", VCPU_STAT(diagnose_other) },
 163         { NULL }
 164 };
 165
 166 struct kvm_s390_tod_clock_ext {
 167         __u8 epoch_idx;
 168         __u64 tod;
 169         __u8 reserved[7];
 170 } __packed;
 171
 172 /* allow nested virtualization in KVM (if enabled by user space) */
 173 static int nested;
 174 module_param(nested, int, S_IRUGO);
 175 MODULE_PARM_DESC(nested, "Nested virtualization support");
 176
 177 /* allow 1m huge page guest backing, if !nested */
 178 static int hpage;
 179 module_param(hpage, int, 0444);
 180 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 181
 182 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 183 static u8 halt_poll_max_steal = 10;
 184 module_param(halt_poll_max_steal, byte, 0644);
 185 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 186
 187 /*
 188  * For now we handle at most 16 double words as this is what the s390 base
 189  * kernel handles and stores in the prefix page. If we ever need to go beyond
 190  * this, this requires changes to code, but the external uapi can stay.
 191  */
 192 #define SIZE_INTERNAL 16
 193
 194 /*
 195  * Base feature mask that defines default mask for facilities. Consists of the
 196  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 197  */
 198 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 199 /*
 200  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 201  * and defines the facilities that can be enabled via a cpu model.
 202  */
 203 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 204
 205 static unsigned long kvm_s390_fac_size(void)
 206 {
 207         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 208         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 209         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 210                 sizeof(S390_lowcore.stfle_fac_list));
 211
 212         return SIZE_INTERNAL;
 213 }
 214
 215 /* available cpu features supported by kvm */
 216 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 217 /* available subfunctions indicated via query / "test bit" */
 218 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 219
 220 static struct gmap_notifier gmap_notifier;
 221 static struct gmap_notifier vsie_gmap_notifier;
 222 debug_info_t *kvm_s390_dbf;
 223
 224 /* Section: not file related */
 225 int kvm_arch_hardware_enable(void)
 226 {
 227         /* every s390 is virtualization enabled ;-) */
 228         return 0;
 229 }
 230
 231 int kvm_arch_check_processor_compat(void)
 232 {
 233         return 0;
 234 }
 235
 236 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 237                               unsigned long end);
 238
 239 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 240 {
 241         u8 delta_idx = 0;
 242
 243         /*
 244          * The TOD jumps by delta, we have to compensate this by adding
 245          * -delta to the epoch.
 246          */
 247         delta = -delta;
 248
 249         /* sign-extension - we're adding to signed values below */
 250         if ((s64)delta < 0)
 251                 delta_idx = -1;
 252
 253         scb->epoch += delta;
 254         if (scb->ecd & ECD_MEF) {
 255                 scb->epdx += delta_idx;
 256                 if (scb->epoch < delta)
 257                         scb->epdx += 1;
 258         }
 259 }
 260
 261 /*
 262  * This callback is executed during stop_machine(). All CPUs are therefore
 263  * temporarily stopped. In order not to change guest behavior, we have to
 264  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 265  * so a CPU won't be stopped while calculating with the epoch.
 266  */
 267 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 268                           void *v)
 269 {
 270         struct kvm *kvm;
 271         struct kvm_vcpu *vcpu;
 272         int i;
 273         unsigned long long *delta = v;
 274
 275         list_for_each_entry(kvm, &vm_list, vm_list) {
 276                 kvm_for_each_vcpu(i, vcpu, kvm) {
 277                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 278                         if (i == 0) {
 279                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 280                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 281                         }
 282                         if (vcpu->arch.cputm_enabled)
 283                                 vcpu->arch.cputm_start += *delta;
 284                         if (vcpu->arch.vsie_block)
 285                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 286                                                    *delta);
 287                 }
 288         }
 289         return NOTIFY_OK;
 290 }
 291
 292 static struct notifier_block kvm_clock_notifier = {
 293         .notifier_call = kvm_clock_sync,
 294 };
 295
 296 int kvm_arch_hardware_setup(void)
 297 {
 298         gmap_notifier.notifier_call = kvm_gmap_notifier;
 299         gmap_register_pte_notifier(&gmap_notifier);
 300         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 301         gmap_register_pte_notifier(&vsie_gmap_notifier);
 302         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 303                                        &kvm_clock_notifier);
 304         return 0;
 305 }
 306
 307 void kvm_arch_hardware_unsetup(void)
 308 {
 309         gmap_unregister_pte_notifier(&gmap_notifier);
 310         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 311         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 312                                          &kvm_clock_notifier);
 313 }
 314
 315 static void allow_cpu_feat(unsigned long nr)
 316 {
 317         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 318 }
 319
 320 static inline int plo_test_bit(unsigned char nr)
 321 {
 322         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 323         int cc;
 324
 325         asm volatile(
 326                 /* Parameter registers are ignored for "test bit" */
 327                 "       plo     0,0,0,0(0)\n"
 328                 "       ipm     %0\n"
 329                 "       srl     %0,28\n"
 330                 : "=d" (cc)
 331                 : "d" (r0)
 332                 : "cc");
 333         return cc == 0;
 334 }
 335
 336 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 337 {
 338         register unsigned long r0 asm("0") = 0; /* query function */
 339         register unsigned long r1 asm("1") = (unsigned long) query;
 340
 341         asm volatile(
 342                 /* Parameter regs are ignored */
 343                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 344                 :
 345                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 346                 : "cc", "memory");
 347 }
 348
 349 #define INSN_SORTL 0xb938
 350 #define INSN_DFLTCC 0xb939
 351
 352 static void kvm_s390_cpu_feat_init(void)
 353 {
 354         int i;
 355
 356         for (i = 0; i < 256; ++i) {
 357                 if (plo_test_bit(i))
 358                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 359         }
 360
 361         if (test_facility(28)) /* TOD-clock steering */
 362                 ptff(kvm_s390_available_subfunc.ptff,
 363                      sizeof(kvm_s390_available_subfunc.ptff),
 364                      PTFF_QAF);
 365
 366         if (test_facility(17)) { /* MSA */
 367                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 368                               kvm_s390_available_subfunc.kmac);
 369                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 370                               kvm_s390_available_subfunc.kmc);
 371                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 372                               kvm_s390_available_subfunc.km);
 373                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.kimd);
 375                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 376                               kvm_s390_available_subfunc.klmd);
 377         }
 378         if (test_facility(76)) /* MSA3 */
 379                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 380                               kvm_s390_available_subfunc.pckmo);
 381         if (test_facility(77)) { /* MSA4 */
 382                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 383                               kvm_s390_available_subfunc.kmctr);
 384                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 385                               kvm_s390_available_subfunc.kmf);
 386                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 387                               kvm_s390_available_subfunc.kmo);
 388                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 389                               kvm_s390_available_subfunc.pcc);
 390         }
 391         if (test_facility(57)) /* MSA5 */
 392                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.ppno);
 394
 395         if (test_facility(146)) /* MSA8 */
 396                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 397                               kvm_s390_available_subfunc.kma);
 398
 399         if (test_facility(155)) /* MSA9 */
 400                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 401                               kvm_s390_available_subfunc.kdsa);
 402
 403         if (test_facility(150)) /* SORTL */
 404                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 405
 406         if (test_facility(151)) /* DFLTCC */
 407                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 408
 409         if (MACHINE_HAS_ESOP)
 410                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 411         /*
 412          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 413          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 414          */
 415         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 416             !test_facility(3) || !nested)
 417                 return;
 418         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 419         if (sclp.has_64bscao)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 421         if (sclp.has_siif)
 422                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 423         if (sclp.has_gpere)
 424                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 425         if (sclp.has_gsls)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 427         if (sclp.has_ib)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 429         if (sclp.has_cei)
 430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 431         if (sclp.has_ibs)
 432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 433         if (sclp.has_kss)
 434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 435         /*
 436          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 437          * all skey handling functions read/set the skey from the PGSTE
 438          * instead of the real storage key.
 439          *
 440          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 441          * pages being detected as preserved although they are resident.
 442          *
 443          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 444          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 445          *
 446          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 447          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 448          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 449          *
 450          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 451          * cannot easily shadow the SCA because of the ipte lock.
 452          */
 453 }
 454
 455 int kvm_arch_init(void *opaque)
 456 {
 457         int rc = -ENOMEM;
 458
 459         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 460         if (!kvm_s390_dbf)
 461                 return -ENOMEM;
 462
 463         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
 464                 goto out;
 465
 466         kvm_s390_cpu_feat_init();
 467
 468         /* Register floating interrupt controller interface. */
 469         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 470         if (rc) {
 471                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 472                 goto out;
 473         }
 474
 475         rc = kvm_s390_gib_init(GAL_ISC);
 476         if (rc)
 477                 goto out;
 478
 479         return 0;
 480
 481 out:
 482         kvm_arch_exit();
 483         return rc;
 484 }
 485
 486 void kvm_arch_exit(void)
 487 {
 488         kvm_s390_gib_destroy();
 489         debug_unregister(kvm_s390_dbf);
 490 }
 491
 492 /* Section: device related */
 493 long kvm_arch_dev_ioctl(struct file *filp,
 494                         unsigned int ioctl, unsigned long arg)
 495 {
 496         if (ioctl == KVM_S390_ENABLE_SIE)
 497                 return s390_enable_sie();
 498         return -EINVAL;
 499 }
 500
 501 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 502 {
 503         int r;
 504
 505         switch (ext) {
 506         case KVM_CAP_S390_PSW:
 507         case KVM_CAP_S390_GMAP:
 508         case KVM_CAP_SYNC_MMU:
 509 #ifdef CONFIG_KVM_S390_UCONTROL
 510         case KVM_CAP_S390_UCONTROL:
 511 #endif
 512         case KVM_CAP_ASYNC_PF:
 513         case KVM_CAP_SYNC_REGS:
 514         case KVM_CAP_ONE_REG:
 515         case KVM_CAP_ENABLE_CAP:
 516         case KVM_CAP_S390_CSS_SUPPORT:
 517         case KVM_CAP_IOEVENTFD:
 518         case KVM_CAP_DEVICE_CTRL:
 519         case KVM_CAP_S390_IRQCHIP:
 520         case KVM_CAP_VM_ATTRIBUTES:
 521         case KVM_CAP_MP_STATE:
 522         case KVM_CAP_IMMEDIATE_EXIT:
 523         case KVM_CAP_S390_INJECT_IRQ:
 524         case KVM_CAP_S390_USER_SIGP:
 525         case KVM_CAP_S390_USER_STSI:
 526         case KVM_CAP_S390_SKEYS:
 527         case KVM_CAP_S390_IRQ_STATE:
 528         case KVM_CAP_S390_USER_INSTR0:
 529         case KVM_CAP_S390_CMMA_MIGRATION:
 530         case KVM_CAP_S390_AIS:
 531         case KVM_CAP_S390_AIS_MIGRATION:
 532                 r = 1;
 533                 break;
 534         case KVM_CAP_S390_HPAGE_1M:
 535                 r = 0;
 536                 if (hpage && !kvm_is_ucontrol(kvm))
 537                         r = 1;
 538                 break;
 539         case KVM_CAP_S390_MEM_OP:
 540                 r = MEM_OP_MAX_SIZE;
 541                 break;
 542         case KVM_CAP_NR_VCPUS:
 543         case KVM_CAP_MAX_VCPUS:
 544         case KVM_CAP_MAX_VCPU_ID:
 545                 r = KVM_S390_BSCA_CPU_SLOTS;
 546                 if (!kvm_s390_use_sca_entries())
 547                         r = KVM_MAX_VCPUS;
 548                 else if (sclp.has_esca && sclp.has_64bscao)
 549                         r = KVM_S390_ESCA_CPU_SLOTS;
 550                 break;
 551         case KVM_CAP_S390_COW:
 552                 r = MACHINE_HAS_ESOP;
 553                 break;
 554         case KVM_CAP_S390_VECTOR_REGISTERS:
 555                 r = MACHINE_HAS_VX;
 556                 break;
 557         case KVM_CAP_S390_RI:
 558                 r = test_facility(64);
 559                 break;
 560         case KVM_CAP_S390_GS:
 561                 r = test_facility(133);
 562                 break;
 563         case KVM_CAP_S390_BPB:
 564                 r = test_facility(82);
 565                 break;
 566         default:
 567                 r = 0;
 568         }
 569         return r;
 570 }
 571
 572 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 573                                     struct kvm_memory_slot *memslot)
 574 {
 575         int i;
 576         gfn_t cur_gfn, last_gfn;
 577         unsigned long gaddr, vmaddr;
 578         struct gmap *gmap = kvm->arch.gmap;
 579         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 580
 581         /* Loop over all guest segments */
 582         cur_gfn = memslot->base_gfn;
 583         last_gfn = memslot->base_gfn + memslot->npages;
 584         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 585                 gaddr = gfn_to_gpa(cur_gfn);
 586                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 587                 if (kvm_is_error_hva(vmaddr))
 588                         continue;
 589
 590                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 591                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 592                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 593                         if (test_bit(i, bitmap))
 594                                 mark_page_dirty(kvm, cur_gfn + i);
 595                 }
 596
 597                 if (fatal_signal_pending(current))
 598                         return;
 599                 cond_resched();
 600         }
 601 }
 602
 603 /* Section: vm related */
 604 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 605
 606 /*
 607  * Get (and clear) the dirty memory log for a memory slot.
 608  */
 609 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 610                                struct kvm_dirty_log *log)
 611 {
 612         int r;
 613         unsigned long n;
 614         struct kvm_memslots *slots;
 615         struct kvm_memory_slot *memslot;
 616         int is_dirty = 0;
 617
 618         if (kvm_is_ucontrol(kvm))
 619                 return -EINVAL;
 620
 621         mutex_lock(&kvm->slots_lock);
 622
 623         r = -EINVAL;
 624         if (log->slot >= KVM_USER_MEM_SLOTS)
 625                 goto out;
 626
 627         slots = kvm_memslots(kvm);
 628         memslot = id_to_memslot(slots, log->slot);
 629         r = -ENOENT;
 630         if (!memslot->dirty_bitmap)
 631                 goto out;
 632
 633         kvm_s390_sync_dirty_log(kvm, memslot);
 634         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 635         if (r)
 636                 goto out;
 637
 638         /* Clear the dirty log */
 639         if (is_dirty) {
 640                 n = kvm_dirty_bitmap_bytes(memslot);
 641                 memset(memslot->dirty_bitmap, 0, n);
 642         }
 643         r = 0;
 644 out:
 645         mutex_unlock(&kvm->slots_lock);
 646         return r;
 647 }
 648
 649 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 650 {
 651         unsigned int i;
 652         struct kvm_vcpu *vcpu;
 653
 654         kvm_for_each_vcpu(i, vcpu, kvm) {
 655                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 656         }
 657 }
 658
 659 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 660 {
 661         int r;
 662
 663         if (cap->flags)
 664                 return -EINVAL;
 665
 666         switch (cap->cap) {
 667         case KVM_CAP_S390_IRQCHIP:
 668                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 669                 kvm->arch.use_irqchip = 1;
 670                 r = 0;
 671                 break;
 672         case KVM_CAP_S390_USER_SIGP:
 673                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 674                 kvm->arch.user_sigp = 1;
 675                 r = 0;
 676                 break;
 677         case KVM_CAP_S390_VECTOR_REGISTERS:
 678                 mutex_lock(&kvm->lock);
 679                 if (kvm->created_vcpus) {
 680                         r = -EBUSY;
 681                 } else if (MACHINE_HAS_VX) {
 682                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 683                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 684                         if (test_facility(134)) {
 685                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 686                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 687                         }
 688                         if (test_facility(135)) {
 689                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 690                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 691                         }
 692                         if (test_facility(148)) {
 693                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 694                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 695                         }
 696                         if (test_facility(152)) {
 697                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 698                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 699                         }
 700                         r = 0;
 701                 } else
 702                         r = -EINVAL;
 703                 mutex_unlock(&kvm->lock);
 704                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 705                          r ? "(not available)" : "(success)");
 706                 break;
 707         case KVM_CAP_S390_RI:
 708                 r = -EINVAL;
 709                 mutex_lock(&kvm->lock);
 710                 if (kvm->created_vcpus) {
 711                         r = -EBUSY;
 712                 } else if (test_facility(64)) {
 713                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 714                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 715                         r = 0;
 716                 }
 717                 mutex_unlock(&kvm->lock);
 718                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 719                          r ? "(not available)" : "(success)");
 720                 break;
 721         case KVM_CAP_S390_AIS:
 722                 mutex_lock(&kvm->lock);
 723                 if (kvm->created_vcpus) {
 724                         r = -EBUSY;
 725                 } else {
 726                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 727                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 728                         r = 0;
 729                 }
 730                 mutex_unlock(&kvm->lock);
 731                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 732                          r ? "(not available)" : "(success)");
 733                 break;
 734         case KVM_CAP_S390_GS:
 735                 r = -EINVAL;
 736                 mutex_lock(&kvm->lock);
 737                 if (kvm->created_vcpus) {
 738                         r = -EBUSY;
 739                 } else if (test_facility(133)) {
 740                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 741                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 742                         r = 0;
 743                 }
 744                 mutex_unlock(&kvm->lock);
 745                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 746                          r ? "(not available)" : "(success)");
 747                 break;
 748         case KVM_CAP_S390_HPAGE_1M:
 749                 mutex_lock(&kvm->lock);
 750                 if (kvm->created_vcpus)
 751                         r = -EBUSY;
 752                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 753                         r = -EINVAL;
 754                 else {
 755                         r = 0;
 756                         down_write(&kvm->mm->mmap_sem);
 757                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 758                         up_write(&kvm->mm->mmap_sem);
 759                         /*
 760                          * We might have to create fake 4k page
 761                          * tables. To avoid that the hardware works on
 762                          * stale PGSTEs, we emulate these instructions.
 763                          */
 764                         kvm->arch.use_skf = 0;
 765                         kvm->arch.use_pfmfi = 0;
 766                 }
 767                 mutex_unlock(&kvm->lock);
 768                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 769                          r ? "(not available)" : "(success)");
 770                 break;
 771         case KVM_CAP_S390_USER_STSI:
 772                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 773                 kvm->arch.user_stsi = 1;
 774                 r = 0;
 775                 break;
 776         case KVM_CAP_S390_USER_INSTR0:
 777                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 778                 kvm->arch.user_instr0 = 1;
 779                 icpt_operexc_on_all_vcpus(kvm);
 780                 r = 0;
 781                 break;
 782         default:
 783                 r = -EINVAL;
 784                 break;
 785         }
 786         return r;
 787 }
 788
 789 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 790 {
 791         int ret;
 792
 793         switch (attr->attr) {
 794         case KVM_S390_VM_MEM_LIMIT_SIZE:
 795                 ret = 0;
 796                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 797                          kvm->arch.mem_limit);
 798                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 799                         ret = -EFAULT;
 800                 break;
 801         default:
 802                 ret = -ENXIO;
 803                 break;
 804         }
 805         return ret;
 806 }
 807
 808 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 809 {
 810         int ret;
 811         unsigned int idx;
 812         switch (attr->attr) {
 813         case KVM_S390_VM_MEM_ENABLE_CMMA:
 814                 ret = -ENXIO;
 815                 if (!sclp.has_cmma)
 816                         break;
 817
 818                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 819                 mutex_lock(&kvm->lock);
 820                 if (kvm->created_vcpus)
 821                         ret = -EBUSY;
 822                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 823                         ret = -EINVAL;
 824                 else {
 825                         kvm->arch.use_cmma = 1;
 826                         /* Not compatible with cmma. */
 827                         kvm->arch.use_pfmfi = 0;
 828                         ret = 0;
 829                 }
 830                 mutex_unlock(&kvm->lock);
 831                 break;
 832         case KVM_S390_VM_MEM_CLR_CMMA:
 833                 ret = -ENXIO;
 834                 if (!sclp.has_cmma)
 835                         break;
 836                 ret = -EINVAL;
 837                 if (!kvm->arch.use_cmma)
 838                         break;
 839
 840                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 841                 mutex_lock(&kvm->lock);
 842                 idx = srcu_read_lock(&kvm->srcu);
 843                 s390_reset_cmma(kvm->arch.gmap->mm);
 844                 srcu_read_unlock(&kvm->srcu, idx);
 845                 mutex_unlock(&kvm->lock);
 846                 ret = 0;
 847                 break;
 848         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 849                 unsigned long new_limit;
 850
 851                 if (kvm_is_ucontrol(kvm))
 852                         return -EINVAL;
 853
 854                 if (get_user(new_limit, (u64 __user *)attr->addr))
 855                         return -EFAULT;
 856
 857                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 858                     new_limit > kvm->arch.mem_limit)
 859                         return -E2BIG;
 860
 861                 if (!new_limit)
 862                         return -EINVAL;
 863
 864                 /* gmap_create takes last usable address */
 865                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 866                         new_limit -= 1;
 867
 868                 ret = -EBUSY;
 869                 mutex_lock(&kvm->lock);
 870                 if (!kvm->created_vcpus) {
 871                         /* gmap_create will round the limit up */
 872                         struct gmap *new = gmap_create(current->mm, new_limit);
 873
 874                         if (!new) {
 875                                 ret = -ENOMEM;
 876                         } else {
 877                                 gmap_remove(kvm->arch.gmap);
 878                                 new->private = kvm;
 879                                 kvm->arch.gmap = new;
 880                                 ret = 0;
 881                         }
 882                 }
 883                 mutex_unlock(&kvm->lock);
 884                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 885                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 886                          (void *) kvm->arch.gmap->asce);
 887                 break;
 888         }
 889         default:
 890                 ret = -ENXIO;
 891                 break;
 892         }
 893         return ret;
 894 }
 895
 896 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 897
 898 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 899 {
 900         struct kvm_vcpu *vcpu;
 901         int i;
 902
 903         kvm_s390_vcpu_block_all(kvm);
 904
 905         kvm_for_each_vcpu(i, vcpu, kvm) {
 906                 kvm_s390_vcpu_crypto_setup(vcpu);
 907                 /* recreate the shadow crycb by leaving the VSIE handler */
 908                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 909         }
 910
 911         kvm_s390_vcpu_unblock_all(kvm);
 912 }
 913
 914 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         mutex_lock(&kvm->lock);
 917         switch (attr->attr) {
 918         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 919                 if (!test_kvm_facility(kvm, 76)) {
 920                         mutex_unlock(&kvm->lock);
 921                         return -EINVAL;
 922                 }
 923                 get_random_bytes(
 924                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 925                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 926                 kvm->arch.crypto.aes_kw = 1;
 927                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 928                 break;
 929         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 930                 if (!test_kvm_facility(kvm, 76)) {
 931                         mutex_unlock(&kvm->lock);
 932                         return -EINVAL;
 933                 }
 934                 get_random_bytes(
 935                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 936                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 937                 kvm->arch.crypto.dea_kw = 1;
 938                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 939                 break;
 940         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 941                 if (!test_kvm_facility(kvm, 76)) {
 942                         mutex_unlock(&kvm->lock);
 943                         return -EINVAL;
 944                 }
 945                 kvm->arch.crypto.aes_kw = 0;
 946                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 947                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 948                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 949                 break;
 950         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 951                 if (!test_kvm_facility(kvm, 76)) {
 952                         mutex_unlock(&kvm->lock);
 953                         return -EINVAL;
 954                 }
 955                 kvm->arch.crypto.dea_kw = 0;
 956                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 957                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 958                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 959                 break;
 960         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 961                 if (!ap_instructions_available()) {
 962                         mutex_unlock(&kvm->lock);
 963                         return -EOPNOTSUPP;
 964                 }
 965                 kvm->arch.crypto.apie = 1;
 966                 break;
 967         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 968                 if (!ap_instructions_available()) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EOPNOTSUPP;
 971                 }
 972                 kvm->arch.crypto.apie = 0;
 973                 break;
 974         default:
 975                 mutex_unlock(&kvm->lock);
 976                 return -ENXIO;
 977         }
 978
 979         kvm_s390_vcpu_crypto_reset_all(kvm);
 980         mutex_unlock(&kvm->lock);
 981         return 0;
 982 }
 983
 984 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 985 {
 986         int cx;
 987         struct kvm_vcpu *vcpu;
 988
 989         kvm_for_each_vcpu(cx, vcpu, kvm)
 990                 kvm_s390_sync_request(req, vcpu);
 991 }
 992
 993 /*
 994  * Must be called with kvm->srcu held to avoid races on memslots, and with
 995  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
 996  */
 997 static int kvm_s390_vm_start_migration(struct kvm *kvm)
 998 {
 999         struct kvm_memory_slot *ms;
1000         struct kvm_memslots *slots;
1001         unsigned long ram_pages = 0;
1002         int slotnr;
1003
1004         /* migration mode already enabled */
1005         if (kvm->arch.migration_mode)
1006                 return 0;
1007         slots = kvm_memslots(kvm);
1008         if (!slots || !slots->used_slots)
1009                 return -EINVAL;
1010
1011         if (!kvm->arch.use_cmma) {
1012                 kvm->arch.migration_mode = 1;
1013                 return 0;
1014         }
1015         /* mark all the pages in active slots as dirty */
1016         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1017                 ms = slots->memslots + slotnr;
1018                 if (!ms->dirty_bitmap)
1019                         return -EINVAL;
1020                 /*
1021                  * The second half of the bitmap is only used on x86,
1022                  * and would be wasted otherwise, so we put it to good
1023                  * use here to keep track of the state of the storage
1024                  * attributes.
1025                  */
1026                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1027                 ram_pages += ms->npages;
1028         }
1029         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1030         kvm->arch.migration_mode = 1;
1031         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1032         return 0;
1033 }
1034
1035 /*
1036  * Must be called with kvm->slots_lock to avoid races with ourselves and
1037  * kvm_s390_vm_start_migration.
1038  */
1039 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1040 {
1041         /* migration mode already disabled */
1042         if (!kvm->arch.migration_mode)
1043                 return 0;
1044         kvm->arch.migration_mode = 0;
1045         if (kvm->arch.use_cmma)
1046                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1047         return 0;
1048 }
1049
1050 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1051                                      struct kvm_device_attr *attr)
1052 {
1053         int res = -ENXIO;
1054
1055         mutex_lock(&kvm->slots_lock);
1056         switch (attr->attr) {
1057         case KVM_S390_VM_MIGRATION_START:
1058                 res = kvm_s390_vm_start_migration(kvm);
1059                 break;
1060         case KVM_S390_VM_MIGRATION_STOP:
1061                 res = kvm_s390_vm_stop_migration(kvm);
1062                 break;
1063         default:
1064                 break;
1065         }
1066         mutex_unlock(&kvm->slots_lock);
1067
1068         return res;
1069 }
1070
1071 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1072                                      struct kvm_device_attr *attr)
1073 {
1074         u64 mig = kvm->arch.migration_mode;
1075
1076         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1077                 return -ENXIO;
1078
1079         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1080                 return -EFAULT;
1081         return 0;
1082 }
1083
1084 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1085 {
1086         struct kvm_s390_vm_tod_clock gtod;
1087
1088         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1089                 return -EFAULT;
1090
1091         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1092                 return -EINVAL;
1093         kvm_s390_set_tod_clock(kvm, &gtod);
1094
1095         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1096                 gtod.epoch_idx, gtod.tod);
1097
1098         return 0;
1099 }
1100
1101 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1102 {
1103         u8 gtod_high;
1104
1105         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1106                                            sizeof(gtod_high)))
1107                 return -EFAULT;
1108
1109         if (gtod_high != 0)
1110                 return -EINVAL;
1111         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1112
1113         return 0;
1114 }
1115
1116 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1117 {
1118         struct kvm_s390_vm_tod_clock gtod = { 0 };
1119
1120         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1121                            sizeof(gtod.tod)))
1122                 return -EFAULT;
1123
1124         kvm_s390_set_tod_clock(kvm, &gtod);
1125         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1126         return 0;
1127 }
1128
1129 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1130 {
1131         int ret;
1132
1133         if (attr->flags)
1134                 return -EINVAL;
1135
1136         switch (attr->attr) {
1137         case KVM_S390_VM_TOD_EXT:
1138                 ret = kvm_s390_set_tod_ext(kvm, attr);
1139                 break;
1140         case KVM_S390_VM_TOD_HIGH:
1141                 ret = kvm_s390_set_tod_high(kvm, attr);
1142                 break;
1143         case KVM_S390_VM_TOD_LOW:
1144                 ret = kvm_s390_set_tod_low(kvm, attr);
1145                 break;
1146         default:
1147                 ret = -ENXIO;
1148                 break;
1149         }
1150         return ret;
1151 }
1152
1153 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1154                                    struct kvm_s390_vm_tod_clock *gtod)
1155 {
1156         struct kvm_s390_tod_clock_ext htod;
1157
1158         preempt_disable();
1159
1160         get_tod_clock_ext((char *)&htod);
1161
1162         gtod->tod = htod.tod + kvm->arch.epoch;
1163         gtod->epoch_idx = 0;
1164         if (test_kvm_facility(kvm, 139)) {
1165                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1166                 if (gtod->tod < htod.tod)
1167                         gtod->epoch_idx += 1;
1168         }
1169
1170         preempt_enable();
1171 }
1172
1173 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1174 {
1175         struct kvm_s390_vm_tod_clock gtod;
1176
1177         memset(&gtod, 0, sizeof(gtod));
1178         kvm_s390_get_tod_clock(kvm, &gtod);
1179         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1180                 return -EFAULT;
1181
1182         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1183                 gtod.epoch_idx, gtod.tod);
1184         return 0;
1185 }
1186
1187 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1188 {
1189         u8 gtod_high = 0;
1190
1191         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1192                                          sizeof(gtod_high)))
1193                 return -EFAULT;
1194         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1195
1196         return 0;
1197 }
1198
1199 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1200 {
1201         u64 gtod;
1202
1203         gtod = kvm_s390_get_tod_clock_fast(kvm);
1204         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1205                 return -EFAULT;
1206         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1207
1208         return 0;
1209 }
1210
1211 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1212 {
1213         int ret;
1214
1215         if (attr->flags)
1216                 return -EINVAL;
1217
1218         switch (attr->attr) {
1219         case KVM_S390_VM_TOD_EXT:
1220                 ret = kvm_s390_get_tod_ext(kvm, attr);
1221                 break;
1222         case KVM_S390_VM_TOD_HIGH:
1223                 ret = kvm_s390_get_tod_high(kvm, attr);
1224                 break;
1225         case KVM_S390_VM_TOD_LOW:
1226                 ret = kvm_s390_get_tod_low(kvm, attr);
1227                 break;
1228         default:
1229                 ret = -ENXIO;
1230                 break;
1231         }
1232         return ret;
1233 }
1234
1235 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1236 {
1237         struct kvm_s390_vm_cpu_processor *proc;
1238         u16 lowest_ibc, unblocked_ibc;
1239         int ret = 0;
1240
1241         mutex_lock(&kvm->lock);
1242         if (kvm->created_vcpus) {
1243                 ret = -EBUSY;
1244                 goto out;
1245         }
1246         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1247         if (!proc) {
1248                 ret = -ENOMEM;
1249                 goto out;
1250         }
1251         if (!copy_from_user(proc, (void __user *)attr->addr,
1252                             sizeof(*proc))) {
1253                 kvm->arch.model.cpuid = proc->cpuid;
1254                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1255                 unblocked_ibc = sclp.ibc & 0xfff;
1256                 if (lowest_ibc && proc->ibc) {
1257                         if (proc->ibc > unblocked_ibc)
1258                                 kvm->arch.model.ibc = unblocked_ibc;
1259                         else if (proc->ibc < lowest_ibc)
1260                                 kvm->arch.model.ibc = lowest_ibc;
1261                         else
1262                                 kvm->arch.model.ibc = proc->ibc;
1263                 }
1264                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1265                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1266                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1267                          kvm->arch.model.ibc,
1268                          kvm->arch.model.cpuid);
1269                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1270                          kvm->arch.model.fac_list[0],
1271                          kvm->arch.model.fac_list[1],
1272                          kvm->arch.model.fac_list[2]);
1273         } else
1274                 ret = -EFAULT;
1275         kfree(proc);
1276 out:
1277         mutex_unlock(&kvm->lock);
1278         return ret;
1279 }
1280
1281 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1282                                        struct kvm_device_attr *attr)
1283 {
1284         struct kvm_s390_vm_cpu_feat data;
1285
1286         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1287                 return -EFAULT;
1288         if (!bitmap_subset((unsigned long *) data.feat,
1289                            kvm_s390_available_cpu_feat,
1290                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1291                 return -EINVAL;
1292
1293         mutex_lock(&kvm->lock);
1294         if (kvm->created_vcpus) {
1295                 mutex_unlock(&kvm->lock);
1296                 return -EBUSY;
1297         }
1298         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1299                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1300         mutex_unlock(&kvm->lock);
1301         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1302                          data.feat[0],
1303                          data.feat[1],
1304                          data.feat[2]);
1305         return 0;
1306 }
1307
1308 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1309                                           struct kvm_device_attr *attr)
1310 {
1311         mutex_lock(&kvm->lock);
1312         if (kvm->created_vcpus) {
1313                 mutex_unlock(&kvm->lock);
1314                 return -EBUSY;
1315         }
1316
1317         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1318                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1319                 mutex_unlock(&kvm->lock);
1320                 return -EFAULT;
1321         }
1322         mutex_unlock(&kvm->lock);
1323
1324         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1325                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1326                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1327                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1328                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1329         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1330                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1331                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1332         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1333                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1335         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1337                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1338         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1340                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1341         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1344         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1347         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1350         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1353         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1356         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1359         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1362         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1365         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1368         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1371         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1376         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1381
1382         return 0;
1383 }
1384
1385 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1386 {
1387         int ret = -ENXIO;
1388
1389         switch (attr->attr) {
1390         case KVM_S390_VM_CPU_PROCESSOR:
1391                 ret = kvm_s390_set_processor(kvm, attr);
1392                 break;
1393         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1394                 ret = kvm_s390_set_processor_feat(kvm, attr);
1395                 break;
1396         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1397                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1398                 break;
1399         }
1400         return ret;
1401 }
1402
1403 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1404 {
1405         struct kvm_s390_vm_cpu_processor *proc;
1406         int ret = 0;
1407
1408         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1409         if (!proc) {
1410                 ret = -ENOMEM;
1411                 goto out;
1412         }
1413         proc->cpuid = kvm->arch.model.cpuid;
1414         proc->ibc = kvm->arch.model.ibc;
1415         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1416                S390_ARCH_FAC_LIST_SIZE_BYTE);
1417         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1418                  kvm->arch.model.ibc,
1419                  kvm->arch.model.cpuid);
1420         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1421                  kvm->arch.model.fac_list[0],
1422                  kvm->arch.model.fac_list[1],
1423                  kvm->arch.model.fac_list[2]);
1424         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1425                 ret = -EFAULT;
1426         kfree(proc);
1427 out:
1428         return ret;
1429 }
1430
1431 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1432 {
1433         struct kvm_s390_vm_cpu_machine *mach;
1434         int ret = 0;
1435
1436         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1437         if (!mach) {
1438                 ret = -ENOMEM;
1439                 goto out;
1440         }
1441         get_cpu_id((struct cpuid *) &mach->cpuid);
1442         mach->ibc = sclp.ibc;
1443         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1444                S390_ARCH_FAC_LIST_SIZE_BYTE);
1445         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1446                sizeof(S390_lowcore.stfle_fac_list));
1447         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1448                  kvm->arch.model.ibc,
1449                  kvm->arch.model.cpuid);
1450         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1451                  mach->fac_mask[0],
1452                  mach->fac_mask[1],
1453                  mach->fac_mask[2]);
1454         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1455                  mach->fac_list[0],
1456                  mach->fac_list[1],
1457                  mach->fac_list[2]);
1458         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1459                 ret = -EFAULT;
1460         kfree(mach);
1461 out:
1462         return ret;
1463 }
1464
1465 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1466                                        struct kvm_device_attr *attr)
1467 {
1468         struct kvm_s390_vm_cpu_feat data;
1469
1470         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1471                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1472         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1473                 return -EFAULT;
1474         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1475                          data.feat[0],
1476                          data.feat[1],
1477                          data.feat[2]);
1478         return 0;
1479 }
1480
1481 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1482                                      struct kvm_device_attr *attr)
1483 {
1484         struct kvm_s390_vm_cpu_feat data;
1485
1486         bitmap_copy((unsigned long *) data.feat,
1487                     kvm_s390_available_cpu_feat,
1488                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1489         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1490                 return -EFAULT;
1491         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1492                          data.feat[0],
1493                          data.feat[1],
1494                          data.feat[2]);
1495         return 0;
1496 }
1497
1498 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1499                                           struct kvm_device_attr *attr)
1500 {
1501         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1502             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1503                 return -EFAULT;
1504
1505         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1506                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1507                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1508                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1509                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1510         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1511                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1512                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1513         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1516         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1518                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1519         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1521                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1522         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1525         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1528         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1531         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1534         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1537         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1540         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1543         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1546         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1549         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1552         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1557         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1562
1563         return 0;
1564 }
1565
1566 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1567                                         struct kvm_device_attr *attr)
1568 {
1569         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1570             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1571                 return -EFAULT;
1572
1573         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1575                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1576                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1577                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1578         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1579                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1580                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1581         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1582                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1583                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1584         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1585                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1586                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1587         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1588                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1589                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1590         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1591                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1592                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1593         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1594                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1595                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1596         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1597                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1598                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1599         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1600                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1602         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1603                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1604                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1605         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1606                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1608         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1609                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1610                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1611         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1612                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1614         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1617         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1618                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1620         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1621                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1622                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1625         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1628                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1630
1631         return 0;
1632 }
1633
1634 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1635 {
1636         int ret = -ENXIO;
1637
1638         switch (attr->attr) {
1639         case KVM_S390_VM_CPU_PROCESSOR:
1640                 ret = kvm_s390_get_processor(kvm, attr);
1641                 break;
1642         case KVM_S390_VM_CPU_MACHINE:
1643                 ret = kvm_s390_get_machine(kvm, attr);
1644                 break;
1645         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1646                 ret = kvm_s390_get_processor_feat(kvm, attr);
1647                 break;
1648         case KVM_S390_VM_CPU_MACHINE_FEAT:
1649                 ret = kvm_s390_get_machine_feat(kvm, attr);
1650                 break;
1651         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1652                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1653                 break;
1654         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1655                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1656                 break;
1657         }
1658         return ret;
1659 }
1660
1661 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1662 {
1663         int ret;
1664
1665         switch (attr->group) {
1666         case KVM_S390_VM_MEM_CTRL:
1667                 ret = kvm_s390_set_mem_control(kvm, attr);
1668                 break;
1669         case KVM_S390_VM_TOD:
1670                 ret = kvm_s390_set_tod(kvm, attr);
1671                 break;
1672         case KVM_S390_VM_CPU_MODEL:
1673                 ret = kvm_s390_set_cpu_model(kvm, attr);
1674                 break;
1675         case KVM_S390_VM_CRYPTO:
1676                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1677                 break;
1678         case KVM_S390_VM_MIGRATION:
1679                 ret = kvm_s390_vm_set_migration(kvm, attr);
1680                 break;
1681         default:
1682                 ret = -ENXIO;
1683                 break;
1684         }
1685
1686         return ret;
1687 }
1688
1689 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1690 {
1691         int ret;
1692
1693         switch (attr->group) {
1694         case KVM_S390_VM_MEM_CTRL:
1695                 ret = kvm_s390_get_mem_control(kvm, attr);
1696                 break;
1697         case KVM_S390_VM_TOD:
1698                 ret = kvm_s390_get_tod(kvm, attr);
1699                 break;
1700         case KVM_S390_VM_CPU_MODEL:
1701                 ret = kvm_s390_get_cpu_model(kvm, attr);
1702                 break;
1703         case KVM_S390_VM_MIGRATION:
1704                 ret = kvm_s390_vm_get_migration(kvm, attr);
1705                 break;
1706         default:
1707                 ret = -ENXIO;
1708                 break;
1709         }
1710
1711         return ret;
1712 }
1713
1714 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1715 {
1716         int ret;
1717
1718         switch (attr->group) {
1719         case KVM_S390_VM_MEM_CTRL:
1720                 switch (attr->attr) {
1721                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1722                 case KVM_S390_VM_MEM_CLR_CMMA:
1723                         ret = sclp.has_cmma ? 0 : -ENXIO;
1724                         break;
1725                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1726                         ret = 0;
1727                         break;
1728                 default:
1729                         ret = -ENXIO;
1730                         break;
1731                 }
1732                 break;
1733         case KVM_S390_VM_TOD:
1734                 switch (attr->attr) {
1735                 case KVM_S390_VM_TOD_LOW:
1736                 case KVM_S390_VM_TOD_HIGH:
1737                         ret = 0;
1738                         break;
1739                 default:
1740                         ret = -ENXIO;
1741                         break;
1742                 }
1743                 break;
1744         case KVM_S390_VM_CPU_MODEL:
1745                 switch (attr->attr) {
1746                 case KVM_S390_VM_CPU_PROCESSOR:
1747                 case KVM_S390_VM_CPU_MACHINE:
1748                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1749                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1750                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1751                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1752                         ret = 0;
1753                         break;
1754                 default:
1755                         ret = -ENXIO;
1756                         break;
1757                 }
1758                 break;
1759         case KVM_S390_VM_CRYPTO:
1760                 switch (attr->attr) {
1761                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1762                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1763                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1764                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1765                         ret = 0;
1766                         break;
1767                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1768                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1769                         ret = ap_instructions_available() ? 0 : -ENXIO;
1770                         break;
1771                 default:
1772                         ret = -ENXIO;
1773                         break;
1774                 }
1775                 break;
1776         case KVM_S390_VM_MIGRATION:
1777                 ret = 0;
1778                 break;
1779         default:
1780                 ret = -ENXIO;
1781                 break;
1782         }
1783
1784         return ret;
1785 }
1786
1787 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1788 {
1789         uint8_t *keys;
1790         uint64_t hva;
1791         int srcu_idx, i, r = 0;
1792
1793         if (args->flags != 0)
1794                 return -EINVAL;
1795
1796         /* Is this guest using storage keys? */
1797         if (!mm_uses_skeys(current->mm))
1798                 return KVM_S390_GET_SKEYS_NONE;
1799
1800         /* Enforce sane limit on memory allocation */
1801         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1802                 return -EINVAL;
1803
1804         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1805         if (!keys)
1806                 return -ENOMEM;
1807
1808         down_read(&current->mm->mmap_sem);
1809         srcu_idx = srcu_read_lock(&kvm->srcu);
1810         for (i = 0; i < args->count; i++) {
1811                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1812                 if (kvm_is_error_hva(hva)) {
1813                         r = -EFAULT;
1814                         break;
1815                 }
1816
1817                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1818                 if (r)
1819                         break;
1820         }
1821         srcu_read_unlock(&kvm->srcu, srcu_idx);
1822         up_read(&current->mm->mmap_sem);
1823
1824         if (!r) {
1825                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1826                                  sizeof(uint8_t) * args->count);
1827                 if (r)
1828                         r = -EFAULT;
1829         }
1830
1831         kvfree(keys);
1832         return r;
1833 }
1834
1835 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1836 {
1837         uint8_t *keys;
1838         uint64_t hva;
1839         int srcu_idx, i, r = 0;
1840         bool unlocked;
1841
1842         if (args->flags != 0)
1843                 return -EINVAL;
1844
1845         /* Enforce sane limit on memory allocation */
1846         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1847                 return -EINVAL;
1848
1849         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1850         if (!keys)
1851                 return -ENOMEM;
1852
1853         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1854                            sizeof(uint8_t) * args->count);
1855         if (r) {
1856                 r = -EFAULT;
1857                 goto out;
1858         }
1859
1860         /* Enable storage key handling for the guest */
1861         r = s390_enable_skey();
1862         if (r)
1863                 goto out;
1864
1865         i = 0;
1866         down_read(&current->mm->mmap_sem);
1867         srcu_idx = srcu_read_lock(&kvm->srcu);
1868         while (i < args->count) {
1869                 unlocked = false;
1870                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1871                 if (kvm_is_error_hva(hva)) {
1872                         r = -EFAULT;
1873                         break;
1874                 }
1875
1876                 /* Lowest order bit is reserved */
1877                 if (keys[i] & 0x01) {
1878                         r = -EINVAL;
1879                         break;
1880                 }
1881
1882                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1883                 if (r) {
1884                         r = fixup_user_fault(current, current->mm, hva,
1885                                              FAULT_FLAG_WRITE, &unlocked);
1886                         if (r)
1887                                 break;
1888                 }
1889                 if (!r)
1890                         i++;
1891         }
1892         srcu_read_unlock(&kvm->srcu, srcu_idx);
1893         up_read(&current->mm->mmap_sem);
1894 out:
1895         kvfree(keys);
1896         return r;
1897 }
1898
1899 /*
1900  * Base address and length must be sent at the start of each block, therefore
1901  * it's cheaper to send some clean data, as long as it's less than the size of
1902  * two longs.
1903  */
1904 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1905 /* for consistency */
1906 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1907
1908 /*
1909  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1910  * address falls in a hole. In that case the index of one of the memslots
1911  * bordering the hole is returned.
1912  */
1913 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1914 {
1915         int start = 0, end = slots->used_slots;
1916         int slot = atomic_read(&slots->lru_slot);
1917         struct kvm_memory_slot *memslots = slots->memslots;
1918
1919         if (gfn >= memslots[slot].base_gfn &&
1920             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1921                 return slot;
1922
1923         while (start < end) {
1924                 slot = start + (end - start) / 2;
1925
1926                 if (gfn >= memslots[slot].base_gfn)
1927                         end = slot;
1928                 else
1929                         start = slot + 1;
1930         }
1931
1932         if (gfn >= memslots[start].base_gfn &&
1933             gfn < memslots[start].base_gfn + memslots[start].npages) {
1934                 atomic_set(&slots->lru_slot, start);
1935         }
1936
1937         return start;
1938 }
1939
1940 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1941                               u8 *res, unsigned long bufsize)
1942 {
1943         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1944
1945         args->count = 0;
1946         while (args->count < bufsize) {
1947                 hva = gfn_to_hva(kvm, cur_gfn);
1948                 /*
1949                  * We return an error if the first value was invalid, but we
1950                  * return successfully if at least one value was copied.
1951                  */
1952                 if (kvm_is_error_hva(hva))
1953                         return args->count ? 0 : -EFAULT;
1954                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1955                         pgstev = 0;
1956                 res[args->count++] = (pgstev >> 24) & 0x43;
1957                 cur_gfn++;
1958         }
1959
1960         return 0;
1961 }
1962
1963 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1964                                               unsigned long cur_gfn)
1965 {
1966         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1967         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1968         unsigned long ofs = cur_gfn - ms->base_gfn;
1969
1970         if (ms->base_gfn + ms->npages <= cur_gfn) {
1971                 slotidx--;
1972                 /* If we are above the highest slot, wrap around */
1973                 if (slotidx < 0)
1974                         slotidx = slots->used_slots - 1;
1975
1976                 ms = slots->memslots + slotidx;
1977                 ofs = 0;
1978         }
1979         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1980         while ((slotidx > 0) && (ofs >= ms->npages)) {
1981                 slotidx--;
1982                 ms = slots->memslots + slotidx;
1983                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1984         }
1985         return ms->base_gfn + ofs;
1986 }
1987
1988 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1989                              u8 *res, unsigned long bufsize)
1990 {
1991         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
1992         struct kvm_memslots *slots = kvm_memslots(kvm);
1993         struct kvm_memory_slot *ms;
1994
1995         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
1996         ms = gfn_to_memslot(kvm, cur_gfn);
1997         args->count = 0;
1998         args->start_gfn = cur_gfn;
1999         if (!ms)
2000                 return 0;
2001         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2002         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2003
2004         while (args->count < bufsize) {
2005                 hva = gfn_to_hva(kvm, cur_gfn);
2006                 if (kvm_is_error_hva(hva))
2007                         return 0;
2008                 /* Decrement only if we actually flipped the bit to 0 */
2009                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2010                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2011                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2012                         pgstev = 0;
2013                 /* Save the value */
2014                 res[args->count++] = (pgstev >> 24) & 0x43;
2015                 /* If the next bit is too far away, stop. */
2016                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2017                         return 0;
2018                 /* If we reached the previous "next", find the next one */
2019                 if (cur_gfn == next_gfn)
2020                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2021                 /* Reached the end of memory or of the buffer, stop */
2022                 if ((next_gfn >= mem_end) ||
2023                     (next_gfn - args->start_gfn >= bufsize))
2024                         return 0;
2025                 cur_gfn++;
2026                 /* Reached the end of the current memslot, take the next one. */
2027                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2028                         ms = gfn_to_memslot(kvm, cur_gfn);
2029                         if (!ms)
2030                                 return 0;
2031                 }
2032         }
2033         return 0;
2034 }
2035
2036 /*
2037  * This function searches for the next page with dirty CMMA attributes, and
2038  * saves the attributes in the buffer up to either the end of the buffer or
2039  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2040  * no trailing clean bytes are saved.
2041  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2042  * output buffer will indicate 0 as length.
2043  */
2044 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2045                                   struct kvm_s390_cmma_log *args)
2046 {
2047         unsigned long bufsize;
2048         int srcu_idx, peek, ret;
2049         u8 *values;
2050
2051         if (!kvm->arch.use_cmma)
2052                 return -ENXIO;
2053         /* Invalid/unsupported flags were specified */
2054         if (args->flags & ~KVM_S390_CMMA_PEEK)
2055                 return -EINVAL;
2056         /* Migration mode query, and we are not doing a migration */
2057         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2058         if (!peek && !kvm->arch.migration_mode)
2059                 return -EINVAL;
2060         /* CMMA is disabled or was not used, or the buffer has length zero */
2061         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2062         if (!bufsize || !kvm->mm->context.uses_cmm) {
2063                 memset(args, 0, sizeof(*args));
2064                 return 0;
2065         }
2066         /* We are not peeking, and there are no dirty pages */
2067         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2068                 memset(args, 0, sizeof(*args));
2069                 return 0;
2070         }
2071
2072         values = vmalloc(bufsize);
2073         if (!values)
2074                 return -ENOMEM;
2075
2076         down_read(&kvm->mm->mmap_sem);
2077         srcu_idx = srcu_read_lock(&kvm->srcu);
2078         if (peek)
2079                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2080         else
2081                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2082         srcu_read_unlock(&kvm->srcu, srcu_idx);
2083         up_read(&kvm->mm->mmap_sem);
2084
2085         if (kvm->arch.migration_mode)
2086                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2087         else
2088                 args->remaining = 0;
2089
2090         if (copy_to_user((void __user *)args->values, values, args->count))
2091                 ret = -EFAULT;
2092
2093         vfree(values);
2094         return ret;
2095 }
2096
2097 /*
2098  * This function sets the CMMA attributes for the given pages. If the input
2099  * buffer has zero length, no action is taken, otherwise the attributes are
2100  * set and the mm->context.uses_cmm flag is set.
2101  */
2102 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2103                                   const struct kvm_s390_cmma_log *args)
2104 {
2105         unsigned long hva, mask, pgstev, i;
2106         uint8_t *bits;
2107         int srcu_idx, r = 0;
2108
2109         mask = args->mask;
2110
2111         if (!kvm->arch.use_cmma)
2112                 return -ENXIO;
2113         /* invalid/unsupported flags */
2114         if (args->flags != 0)
2115                 return -EINVAL;
2116         /* Enforce sane limit on memory allocation */
2117         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2118                 return -EINVAL;
2119         /* Nothing to do */
2120         if (args->count == 0)
2121                 return 0;
2122
2123         bits = vmalloc(array_size(sizeof(*bits), args->count));
2124         if (!bits)
2125                 return -ENOMEM;
2126
2127         r = copy_from_user(bits, (void __user *)args->values, args->count);
2128         if (r) {
2129                 r = -EFAULT;
2130                 goto out;
2131         }
2132
2133         down_read(&kvm->mm->mmap_sem);
2134         srcu_idx = srcu_read_lock(&kvm->srcu);
2135         for (i = 0; i < args->count; i++) {
2136                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2137                 if (kvm_is_error_hva(hva)) {
2138                         r = -EFAULT;
2139                         break;
2140                 }
2141
2142                 pgstev = bits[i];
2143                 pgstev = pgstev << 24;
2144                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2145                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2146         }
2147         srcu_read_unlock(&kvm->srcu, srcu_idx);
2148         up_read(&kvm->mm->mmap_sem);
2149
2150         if (!kvm->mm->context.uses_cmm) {
2151                 down_write(&kvm->mm->mmap_sem);
2152                 kvm->mm->context.uses_cmm = 1;
2153                 up_write(&kvm->mm->mmap_sem);
2154         }
2155 out:
2156         vfree(bits);
2157         return r;
2158 }
2159
2160 long kvm_arch_vm_ioctl(struct file *filp,
2161                        unsigned int ioctl, unsigned long arg)
2162 {
2163         struct kvm *kvm = filp->private_data;
2164         void __user *argp = (void __user *)arg;
2165         struct kvm_device_attr attr;
2166         int r;
2167
2168         switch (ioctl) {
2169         case KVM_S390_INTERRUPT: {
2170                 struct kvm_s390_interrupt s390int;
2171
2172                 r = -EFAULT;
2173                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2174                         break;
2175                 r = kvm_s390_inject_vm(kvm, &s390int);
2176                 break;
2177         }
2178         case KVM_CREATE_IRQCHIP: {
2179                 struct kvm_irq_routing_entry routing;
2180
2181                 r = -EINVAL;
2182                 if (kvm->arch.use_irqchip) {
2183                         /* Set up dummy routing. */
2184                         memset(&routing, 0, sizeof(routing));
2185                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2186                 }
2187                 break;
2188         }
2189         case KVM_SET_DEVICE_ATTR: {
2190                 r = -EFAULT;
2191                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2192                         break;
2193                 r = kvm_s390_vm_set_attr(kvm, &attr);
2194                 break;
2195         }
2196         case KVM_GET_DEVICE_ATTR: {
2197                 r = -EFAULT;
2198                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2199                         break;
2200                 r = kvm_s390_vm_get_attr(kvm, &attr);
2201                 break;
2202         }
2203         case KVM_HAS_DEVICE_ATTR: {
2204                 r = -EFAULT;
2205                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2206                         break;
2207                 r = kvm_s390_vm_has_attr(kvm, &attr);
2208                 break;
2209         }
2210         case KVM_S390_GET_SKEYS: {
2211                 struct kvm_s390_skeys args;
2212
2213                 r = -EFAULT;
2214                 if (copy_from_user(&args, argp,
2215                                    sizeof(struct kvm_s390_skeys)))
2216                         break;
2217                 r = kvm_s390_get_skeys(kvm, &args);
2218                 break;
2219         }
2220         case KVM_S390_SET_SKEYS: {
2221                 struct kvm_s390_skeys args;
2222
2223                 r = -EFAULT;
2224                 if (copy_from_user(&args, argp,
2225                                    sizeof(struct kvm_s390_skeys)))
2226                         break;
2227                 r = kvm_s390_set_skeys(kvm, &args);
2228                 break;
2229         }
2230         case KVM_S390_GET_CMMA_BITS: {
2231                 struct kvm_s390_cmma_log args;
2232
2233                 r = -EFAULT;
2234                 if (copy_from_user(&args, argp, sizeof(args)))
2235                         break;
2236                 mutex_lock(&kvm->slots_lock);
2237                 r = kvm_s390_get_cmma_bits(kvm, &args);
2238                 mutex_unlock(&kvm->slots_lock);
2239                 if (!r) {
2240                         r = copy_to_user(argp, &args, sizeof(args));
2241                         if (r)
2242                                 r = -EFAULT;
2243                 }
2244                 break;
2245         }
2246         case KVM_S390_SET_CMMA_BITS: {
2247                 struct kvm_s390_cmma_log args;
2248
2249                 r = -EFAULT;
2250                 if (copy_from_user(&args, argp, sizeof(args)))
2251                         break;
2252                 mutex_lock(&kvm->slots_lock);
2253                 r = kvm_s390_set_cmma_bits(kvm, &args);
2254                 mutex_unlock(&kvm->slots_lock);
2255                 break;
2256         }
2257         default:
2258                 r = -ENOTTY;
2259         }
2260
2261         return r;
2262 }
2263
2264 static int kvm_s390_apxa_installed(void)
2265 {
2266         struct ap_config_info info;
2267
2268         if (ap_instructions_available()) {
2269                 if (ap_qci(&info) == 0)
2270                         return info.apxa;
2271         }
2272
2273         return 0;
2274 }
2275
2276 /*
2277  * The format of the crypto control block (CRYCB) is specified in the 3 low
2278  * order bits of the CRYCB designation (CRYCBD) field as follows:
2279  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2280  *           AP extended addressing (APXA) facility are installed.
2281  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2282  * Format 2: Both the APXA and MSAX3 facilities are installed
2283  */
2284 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2285 {
2286         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2287
2288         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2289         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2290
2291         /* Check whether MSAX3 is installed */
2292         if (!test_kvm_facility(kvm, 76))
2293                 return;
2294
2295         if (kvm_s390_apxa_installed())
2296                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2297         else
2298                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2299 }
2300
2301 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2302                                unsigned long *aqm, unsigned long *adm)
2303 {
2304         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2305
2306         mutex_lock(&kvm->lock);
2307         kvm_s390_vcpu_block_all(kvm);
2308
2309         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2310         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2311                 memcpy(crycb->apcb1.apm, apm, 32);
2312                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2313                          apm[0], apm[1], apm[2], apm[3]);
2314                 memcpy(crycb->apcb1.aqm, aqm, 32);
2315                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2316                          aqm[0], aqm[1], aqm[2], aqm[3]);
2317                 memcpy(crycb->apcb1.adm, adm, 32);
2318                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2319                          adm[0], adm[1], adm[2], adm[3]);
2320                 break;
2321         case CRYCB_FORMAT1:
2322         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2323                 memcpy(crycb->apcb0.apm, apm, 8);
2324                 memcpy(crycb->apcb0.aqm, aqm, 2);
2325                 memcpy(crycb->apcb0.adm, adm, 2);
2326                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2327                          apm[0], *((unsigned short *)aqm),
2328                          *((unsigned short *)adm));
2329                 break;
2330         default:        /* Can not happen */
2331                 break;
2332         }
2333
2334         /* recreate the shadow crycb for each vcpu */
2335         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2336         kvm_s390_vcpu_unblock_all(kvm);
2337         mutex_unlock(&kvm->lock);
2338 }
2339 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2340
2341 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2342 {
2343         mutex_lock(&kvm->lock);
2344         kvm_s390_vcpu_block_all(kvm);
2345
2346         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2347                sizeof(kvm->arch.crypto.crycb->apcb0));
2348         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2349                sizeof(kvm->arch.crypto.crycb->apcb1));
2350
2351         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2352         /* recreate the shadow crycb for each vcpu */
2353         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2354         kvm_s390_vcpu_unblock_all(kvm);
2355         mutex_unlock(&kvm->lock);
2356 }
2357 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2358
2359 static u64 kvm_s390_get_initial_cpuid(void)
2360 {
2361         struct cpuid cpuid;
2362
2363         get_cpu_id(&cpuid);
2364         cpuid.version = 0xff;
2365         return *((u64 *) &cpuid);
2366 }
2367
2368 static void kvm_s390_crypto_init(struct kvm *kvm)
2369 {
2370         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2371         kvm_s390_set_crycb_format(kvm);
2372
2373         if (!test_kvm_facility(kvm, 76))
2374                 return;
2375
2376         /* Enable AES/DEA protected key functions by default */
2377         kvm->arch.crypto.aes_kw = 1;
2378         kvm->arch.crypto.dea_kw = 1;
2379         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2380                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2381         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2382                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2383 }
2384
2385 static void sca_dispose(struct kvm *kvm)
2386 {
2387         if (kvm->arch.use_esca)
2388                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2389         else
2390                 free_page((unsigned long)(kvm->arch.sca));
2391         kvm->arch.sca = NULL;
2392 }
2393
2394 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2395 {
2396         gfp_t alloc_flags = GFP_KERNEL;
2397         int i, rc;
2398         char debug_name[16];
2399         static unsigned long sca_offset;
2400
2401         rc = -EINVAL;
2402 #ifdef CONFIG_KVM_S390_UCONTROL
2403         if (type & ~KVM_VM_S390_UCONTROL)
2404                 goto out_err;
2405         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2406                 goto out_err;
2407 #else
2408         if (type)
2409                 goto out_err;
2410 #endif
2411
2412         rc = s390_enable_sie();
2413         if (rc)
2414                 goto out_err;
2415
2416         rc = -ENOMEM;
2417
2418         if (!sclp.has_64bscao)
2419                 alloc_flags |= GFP_DMA;
2420         rwlock_init(&kvm->arch.sca_lock);
2421         /* start with basic SCA */
2422         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2423         if (!kvm->arch.sca)
2424                 goto out_err;
2425         mutex_lock(&kvm_lock);
2426         sca_offset += 16;
2427         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2428                 sca_offset = 0;
2429         kvm->arch.sca = (struct bsca_block *)
2430                         ((char *) kvm->arch.sca + sca_offset);
2431         mutex_unlock(&kvm_lock);
2432
2433         sprintf(debug_name, "kvm-%u", current->pid);
2434
2435         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2436         if (!kvm->arch.dbf)
2437                 goto out_err;
2438
2439         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2440         kvm->arch.sie_page2 =
2441              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2442         if (!kvm->arch.sie_page2)
2443                 goto out_err;
2444
2445         kvm->arch.sie_page2->kvm = kvm;
2446         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2447
2448         for (i = 0; i < kvm_s390_fac_size(); i++) {
2449                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2450                                               (kvm_s390_fac_base[i] |
2451                                                kvm_s390_fac_ext[i]);
2452                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2453                                               kvm_s390_fac_base[i];
2454         }
2455         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2456
2457         /* we are always in czam mode - even on pre z14 machines */
2458         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2459         set_kvm_facility(kvm->arch.model.fac_list, 138);
2460         /* we emulate STHYI in kvm */
2461         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2462         set_kvm_facility(kvm->arch.model.fac_list, 74);
2463         if (MACHINE_HAS_TLB_GUEST) {
2464                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2465                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2466         }
2467
2468         if (css_general_characteristics.aiv && test_facility(65))
2469                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2470
2471         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2472         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2473
2474         kvm_s390_crypto_init(kvm);
2475
2476         mutex_init(&kvm->arch.float_int.ais_lock);
2477         spin_lock_init(&kvm->arch.float_int.lock);
2478         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2479                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2480         init_waitqueue_head(&kvm->arch.ipte_wq);
2481         mutex_init(&kvm->arch.ipte_mutex);
2482
2483         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2484         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2485
2486         if (type & KVM_VM_S390_UCONTROL) {
2487                 kvm->arch.gmap = NULL;
2488                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2489         } else {
2490                 if (sclp.hamax == U64_MAX)
2491                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2492                 else
2493                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2494                                                     sclp.hamax + 1);
2495                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2496                 if (!kvm->arch.gmap)
2497                         goto out_err;
2498                 kvm->arch.gmap->private = kvm;
2499                 kvm->arch.gmap->pfault_enabled = 0;
2500         }
2501
2502         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2503         kvm->arch.use_skf = sclp.has_skey;
2504         spin_lock_init(&kvm->arch.start_stop_lock);
2505         kvm_s390_vsie_init(kvm);
2506         kvm_s390_gisa_init(kvm);
2507         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2508
2509         return 0;
2510 out_err:
2511         free_page((unsigned long)kvm->arch.sie_page2);
2512         debug_unregister(kvm->arch.dbf);
2513         sca_dispose(kvm);
2514         KVM_EVENT(3, "creation of vm failed: %d", rc);
2515         return rc;
2516 }
2517
2518 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2519 {
2520         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2521         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2522         kvm_s390_clear_local_irqs(vcpu);
2523         kvm_clear_async_pf_completion_queue(vcpu);
2524         if (!kvm_is_ucontrol(vcpu->kvm))
2525                 sca_del_vcpu(vcpu);
2526
2527         if (kvm_is_ucontrol(vcpu->kvm))
2528                 gmap_remove(vcpu->arch.gmap);
2529
2530         if (vcpu->kvm->arch.use_cmma)
2531                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2532         free_page((unsigned long)(vcpu->arch.sie_block));
2533 }
2534
2535 static void kvm_free_vcpus(struct kvm *kvm)
2536 {
2537         unsigned int i;
2538         struct kvm_vcpu *vcpu;
2539
2540         kvm_for_each_vcpu(i, vcpu, kvm)
2541                 kvm_vcpu_destroy(vcpu);
2542
2543         mutex_lock(&kvm->lock);
2544         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2545                 kvm->vcpus[i] = NULL;
2546
2547         atomic_set(&kvm->online_vcpus, 0);
2548         mutex_unlock(&kvm->lock);
2549 }
2550
2551 void kvm_arch_destroy_vm(struct kvm *kvm)
2552 {
2553         kvm_free_vcpus(kvm);
2554         sca_dispose(kvm);
2555         debug_unregister(kvm->arch.dbf);
2556         kvm_s390_gisa_destroy(kvm);
2557         free_page((unsigned long)kvm->arch.sie_page2);
2558         if (!kvm_is_ucontrol(kvm))
2559                 gmap_remove(kvm->arch.gmap);
2560         kvm_s390_destroy_adapters(kvm);
2561         kvm_s390_clear_float_irqs(kvm);
2562         kvm_s390_vsie_destroy(kvm);
2563         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2564 }
2565
2566 /* Section: vcpu related */
2567 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2568 {
2569         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2570         if (!vcpu->arch.gmap)
2571                 return -ENOMEM;
2572         vcpu->arch.gmap->private = vcpu->kvm;
2573
2574         return 0;
2575 }
2576
2577 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2578 {
2579         if (!kvm_s390_use_sca_entries())
2580                 return;
2581         read_lock(&vcpu->kvm->arch.sca_lock);
2582         if (vcpu->kvm->arch.use_esca) {
2583                 struct esca_block *sca = vcpu->kvm->arch.sca;
2584
2585                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2586                 sca->cpu[vcpu->vcpu_id].sda = 0;
2587         } else {
2588                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2589
2590                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2591                 sca->cpu[vcpu->vcpu_id].sda = 0;
2592         }
2593         read_unlock(&vcpu->kvm->arch.sca_lock);
2594 }
2595
2596 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2597 {
2598         if (!kvm_s390_use_sca_entries()) {
2599                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2600
2601                 /* we still need the basic sca for the ipte control */
2602                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2603                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2604                 return;
2605         }
2606         read_lock(&vcpu->kvm->arch.sca_lock);
2607         if (vcpu->kvm->arch.use_esca) {
2608                 struct esca_block *sca = vcpu->kvm->arch.sca;
2609
2610                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2611                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2612                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2613                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2614                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2615         } else {
2616                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2617
2618                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2619                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2620                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2621                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2622         }
2623         read_unlock(&vcpu->kvm->arch.sca_lock);
2624 }
2625
2626 /* Basic SCA to Extended SCA data copy routines */
2627 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2628 {
2629         d->sda = s->sda;
2630         d->sigp_ctrl.c = s->sigp_ctrl.c;
2631         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2632 }
2633
2634 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2635 {
2636         int i;
2637
2638         d->ipte_control = s->ipte_control;
2639         d->mcn[0] = s->mcn;
2640         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2641                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2642 }
2643
2644 static int sca_switch_to_extended(struct kvm *kvm)
2645 {
2646         struct bsca_block *old_sca = kvm->arch.sca;
2647         struct esca_block *new_sca;
2648         struct kvm_vcpu *vcpu;
2649         unsigned int vcpu_idx;
2650         u32 scaol, scaoh;
2651
2652         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2653         if (!new_sca)
2654                 return -ENOMEM;
2655
2656         scaoh = (u32)((u64)(new_sca) >> 32);
2657         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2658
2659         kvm_s390_vcpu_block_all(kvm);
2660         write_lock(&kvm->arch.sca_lock);
2661
2662         sca_copy_b_to_e(new_sca, old_sca);
2663
2664         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2665                 vcpu->arch.sie_block->scaoh = scaoh;
2666                 vcpu->arch.sie_block->scaol = scaol;
2667                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2668         }
2669         kvm->arch.sca = new_sca;
2670         kvm->arch.use_esca = 1;
2671
2672         write_unlock(&kvm->arch.sca_lock);
2673         kvm_s390_vcpu_unblock_all(kvm);
2674
2675         free_page((unsigned long)old_sca);
2676
2677         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2678                  old_sca, kvm->arch.sca);
2679         return 0;
2680 }
2681
2682 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2683 {
2684         int rc;
2685
2686         if (!kvm_s390_use_sca_entries()) {
2687                 if (id < KVM_MAX_VCPUS)
2688                         return true;
2689                 return false;
2690         }
2691         if (id < KVM_S390_BSCA_CPU_SLOTS)
2692                 return true;
2693         if (!sclp.has_esca || !sclp.has_64bscao)
2694                 return false;
2695
2696         mutex_lock(&kvm->lock);
2697         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2698         mutex_unlock(&kvm->lock);
2699
2700         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2701 }
2702
2703 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2704 {
2705         return 0;
2706 }
2707
2708 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2709 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2710 {
2711         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2712         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2713         vcpu->arch.cputm_start = get_tod_clock_fast();
2714         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2715 }
2716
2717 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2718 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2719 {
2720         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2721         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2722         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2723         vcpu->arch.cputm_start = 0;
2724         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2725 }
2726
2727 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2728 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2729 {
2730         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2731         vcpu->arch.cputm_enabled = true;
2732         __start_cpu_timer_accounting(vcpu);
2733 }
2734
2735 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2736 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2737 {
2738         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2739         __stop_cpu_timer_accounting(vcpu);
2740         vcpu->arch.cputm_enabled = false;
2741 }
2742
2743 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2744 {
2745         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2746         __enable_cpu_timer_accounting(vcpu);
2747         preempt_enable();
2748 }
2749
2750 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2751 {
2752         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2753         __disable_cpu_timer_accounting(vcpu);
2754         preempt_enable();
2755 }
2756
2757 /* set the cpu timer - may only be called from the VCPU thread itself */
2758 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2759 {
2760         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2761         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2762         if (vcpu->arch.cputm_enabled)
2763                 vcpu->arch.cputm_start = get_tod_clock_fast();
2764         vcpu->arch.sie_block->cputm = cputm;
2765         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2766         preempt_enable();
2767 }
2768
2769 /* update and get the cpu timer - can also be called from other VCPU threads */
2770 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2771 {
2772         unsigned int seq;
2773         __u64 value;
2774
2775         if (unlikely(!vcpu->arch.cputm_enabled))
2776                 return vcpu->arch.sie_block->cputm;
2777
2778         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2779         do {
2780                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2781                 /*
2782                  * If the writer would ever execute a read in the critical
2783                  * section, e.g. in irq context, we have a deadlock.
2784                  */
2785                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2786                 value = vcpu->arch.sie_block->cputm;
2787                 /* if cputm_start is 0, accounting is being started/stopped */
2788                 if (likely(vcpu->arch.cputm_start))
2789                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2790         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2791         preempt_enable();
2792         return value;
2793 }
2794
2795 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2796 {
2797
2798         gmap_enable(vcpu->arch.enabled_gmap);
2799         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
2800         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2801                 __start_cpu_timer_accounting(vcpu);
2802         vcpu->cpu = cpu;
2803 }
2804
2805 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2806 {
2807         vcpu->cpu = -1;
2808         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2809                 __stop_cpu_timer_accounting(vcpu);
2810         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
2811         vcpu->arch.enabled_gmap = gmap_get_enabled();
2812         gmap_disable(vcpu->arch.enabled_gmap);
2813
2814 }
2815
2816 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2817 {
2818         /* this equals initial cpu reset in pop, but we don't switch to ESA */
2819         vcpu->arch.sie_block->gpsw.mask = 0UL;
2820         vcpu->arch.sie_block->gpsw.addr = 0UL;
2821         kvm_s390_set_prefix(vcpu, 0);
2822         kvm_s390_set_cpu_timer(vcpu, 0);
2823         vcpu->arch.sie_block->ckc       = 0UL;
2824         vcpu->arch.sie_block->todpr     = 0;
2825         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2826         vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
2827                                         CR0_INTERRUPT_KEY_SUBMASK |
2828                                         CR0_MEASUREMENT_ALERT_SUBMASK;
2829         vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
2830                                         CR14_UNUSED_33 |
2831                                         CR14_EXTERNAL_DAMAGE_SUBMASK;
2832         /* make sure the new fpc will be lazily loaded */
2833         save_fpu_regs();
2834         current->thread.fpu.fpc = 0;
2835         vcpu->arch.sie_block->gbea = 1;
2836         vcpu->arch.sie_block->pp = 0;
2837         vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
2838         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2839         kvm_clear_async_pf_completion_queue(vcpu);
2840         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2841                 kvm_s390_vcpu_stop(vcpu);
2842         kvm_s390_clear_local_irqs(vcpu);
2843 }
2844
2845 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2846 {
2847         mutex_lock(&vcpu->kvm->lock);
2848         preempt_disable();
2849         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2850         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
2851         preempt_enable();
2852         mutex_unlock(&vcpu->kvm->lock);
2853         if (!kvm_is_ucontrol(vcpu->kvm)) {
2854                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2855                 sca_add_vcpu(vcpu);
2856         }
2857         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2858                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2859         /* make vcpu_load load the right gmap on the first trigger */
2860         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2861 }
2862
2863 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
2864 {
2865         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
2866             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
2867                 return true;
2868         return false;
2869 }
2870
2871 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
2872 {
2873         /* At least one ECC subfunction must be present */
2874         return kvm_has_pckmo_subfunc(kvm, 32) ||
2875                kvm_has_pckmo_subfunc(kvm, 33) ||
2876                kvm_has_pckmo_subfunc(kvm, 34) ||
2877                kvm_has_pckmo_subfunc(kvm, 40) ||
2878                kvm_has_pckmo_subfunc(kvm, 41);
2879
2880 }
2881
2882 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2883 {
2884         /*
2885          * If the AP instructions are not being interpreted and the MSAX3
2886          * facility is not configured for the guest, there is nothing to set up.
2887          */
2888         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
2889                 return;
2890
2891         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2892         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2893         vcpu->arch.sie_block->eca &= ~ECA_APIE;
2894         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
2895
2896         if (vcpu->kvm->arch.crypto.apie)
2897                 vcpu->arch.sie_block->eca |= ECA_APIE;
2898
2899         /* Set up protected key support */
2900         if (vcpu->kvm->arch.crypto.aes_kw) {
2901                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2902                 /* ecc is also wrapped with AES key */
2903                 if (kvm_has_pckmo_ecc(vcpu->kvm))
2904                         vcpu->arch.sie_block->ecd |= ECD_ECC;
2905         }
2906
2907         if (vcpu->kvm->arch.crypto.dea_kw)
2908                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2909 }
2910
2911 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2912 {
2913         free_page(vcpu->arch.sie_block->cbrlo);
2914         vcpu->arch.sie_block->cbrlo = 0;
2915 }
2916
2917 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2918 {
2919         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2920         if (!vcpu->arch.sie_block->cbrlo)
2921                 return -ENOMEM;
2922         return 0;
2923 }
2924
2925 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2926 {
2927         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2928
2929         vcpu->arch.sie_block->ibc = model->ibc;
2930         if (test_kvm_facility(vcpu->kvm, 7))
2931                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2932 }
2933
2934 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2935 {
2936         return 0;
2937 }
2938
2939 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
2940 {
2941         int rc = 0;
2942
2943         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2944                                                     CPUSTAT_SM |
2945                                                     CPUSTAT_STOPPED);
2946
2947         if (test_kvm_facility(vcpu->kvm, 78))
2948                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
2949         else if (test_kvm_facility(vcpu->kvm, 8))
2950                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
2951
2952         kvm_s390_vcpu_setup_model(vcpu);
2953
2954         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2955         if (MACHINE_HAS_ESOP)
2956                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2957         if (test_kvm_facility(vcpu->kvm, 9))
2958                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
2959         if (test_kvm_facility(vcpu->kvm, 73))
2960                 vcpu->arch.sie_block->ecb |= ECB_TE;
2961
2962         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
2963                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2964         if (test_kvm_facility(vcpu->kvm, 130))
2965                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2966         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2967         if (sclp.has_cei)
2968                 vcpu->arch.sie_block->eca |= ECA_CEI;
2969         if (sclp.has_ib)
2970                 vcpu->arch.sie_block->eca |= ECA_IB;
2971         if (sclp.has_siif)
2972                 vcpu->arch.sie_block->eca |= ECA_SII;
2973         if (sclp.has_sigpif)
2974                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
2975         if (test_kvm_facility(vcpu->kvm, 129)) {
2976                 vcpu->arch.sie_block->eca |= ECA_VX;
2977                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2978         }
2979         if (test_kvm_facility(vcpu->kvm, 139))
2980                 vcpu->arch.sie_block->ecd |= ECD_MEF;
2981         if (test_kvm_facility(vcpu->kvm, 156))
2982                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
2983         if (vcpu->arch.sie_block->gd) {
2984                 vcpu->arch.sie_block->eca |= ECA_AIV;
2985                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
2986                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
2987         }
2988         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2989                                         | SDNXC;
2990         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2991
2992         if (sclp.has_kss)
2993                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
2994         else
2995                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2996
2997         if (vcpu->kvm->arch.use_cmma) {
2998                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
2999                 if (rc)
3000                         return rc;
3001         }
3002         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3003         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3004
3005         vcpu->arch.sie_block->hpid = HPID_KVM;
3006
3007         kvm_s390_vcpu_crypto_setup(vcpu);
3008
3009         return rc;
3010 }
3011
3012 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3013 {
3014         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3015                 return -EINVAL;
3016         return 0;
3017 }
3018
3019 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3020 {
3021         struct sie_page *sie_page;
3022         int rc;
3023
3024         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3025         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3026         if (!sie_page)
3027                 return -ENOMEM;
3028
3029         vcpu->arch.sie_block = &sie_page->sie_block;
3030         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3031
3032         /* the real guest size will always be smaller than msl */
3033         vcpu->arch.sie_block->mso = 0;
3034         vcpu->arch.sie_block->msl = sclp.hamax;
3035
3036         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3037         spin_lock_init(&vcpu->arch.local_int.lock);
3038         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3039         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3040                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3041         seqcount_init(&vcpu->arch.cputm_seqcount);
3042
3043         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3044         kvm_clear_async_pf_completion_queue(vcpu);
3045         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3046                                     KVM_SYNC_GPRS |
3047                                     KVM_SYNC_ACRS |
3048                                     KVM_SYNC_CRS |
3049                                     KVM_SYNC_ARCH0 |
3050                                     KVM_SYNC_PFAULT;
3051         kvm_s390_set_prefix(vcpu, 0);
3052         if (test_kvm_facility(vcpu->kvm, 64))
3053                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3054         if (test_kvm_facility(vcpu->kvm, 82))
3055                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3056         if (test_kvm_facility(vcpu->kvm, 133))
3057                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3058         if (test_kvm_facility(vcpu->kvm, 156))
3059                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3060         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3061          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3062          */
3063         if (MACHINE_HAS_VX)
3064                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3065         else
3066                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3067
3068         if (kvm_is_ucontrol(vcpu->kvm)) {
3069                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3070                 if (rc)
3071                         goto out_free_sie_block;
3072         }
3073
3074         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3075                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3076         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3077
3078         rc = kvm_s390_vcpu_setup(vcpu);
3079         if (rc)
3080                 goto out_ucontrol_uninit;
3081         return 0;
3082
3083 out_ucontrol_uninit:
3084         if (kvm_is_ucontrol(vcpu->kvm))
3085                 gmap_remove(vcpu->arch.gmap);
3086 out_free_sie_block:
3087         free_page((unsigned long)(vcpu->arch.sie_block));
3088         return rc;
3089 }
3090
3091 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3092 {
3093         return kvm_s390_vcpu_has_irq(vcpu, 0);
3094 }
3095
3096 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3097 {
3098         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3099 }
3100
3101 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3102 {
3103         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3104         exit_sie(vcpu);
3105 }
3106
3107 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3108 {
3109         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3110 }
3111
3112 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3113 {
3114         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3115         exit_sie(vcpu);
3116 }
3117
3118 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3119 {
3120         return atomic_read(&vcpu->arch.sie_block->prog20) &
3121                (PROG_BLOCK_SIE | PROG_REQUEST);
3122 }
3123
3124 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3125 {
3126         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3127 }
3128
3129 /*
3130  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3131  * If the CPU is not running (e.g. waiting as idle) the function will
3132  * return immediately. */
3133 void exit_sie(struct kvm_vcpu *vcpu)
3134 {
3135         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3136         kvm_s390_vsie_kick(vcpu);
3137         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3138                 cpu_relax();
3139 }
3140
3141 /* Kick a guest cpu out of SIE to process a request synchronously */
3142 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3143 {
3144         kvm_make_request(req, vcpu);
3145         kvm_s390_vcpu_request(vcpu);
3146 }
3147
3148 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3149                               unsigned long end)
3150 {
3151         struct kvm *kvm = gmap->private;
3152         struct kvm_vcpu *vcpu;
3153         unsigned long prefix;
3154         int i;
3155
3156         if (gmap_is_shadow(gmap))
3157                 return;
3158         if (start >= 1UL << 31)
3159                 /* We are only interested in prefix pages */
3160                 return;
3161         kvm_for_each_vcpu(i, vcpu, kvm) {
3162                 /* match against both prefix pages */
3163                 prefix = kvm_s390_get_prefix(vcpu);
3164                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3165                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3166                                    start, end);
3167                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3168                 }
3169         }
3170 }
3171
3172 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3173 {
3174         /* do not poll with more than halt_poll_max_steal percent of steal time */
3175         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3176             halt_poll_max_steal) {
3177                 vcpu->stat.halt_no_poll_steal++;
3178                 return true;
3179         }
3180         return false;
3181 }
3182
3183 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3184 {
3185         /* kvm common code refers to this, but never calls it */
3186         BUG();
3187         return 0;
3188 }
3189
3190 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3191                                            struct kvm_one_reg *reg)
3192 {
3193         int r = -EINVAL;
3194
3195         switch (reg->id) {
3196         case KVM_REG_S390_TODPR:
3197                 r = put_user(vcpu->arch.sie_block->todpr,
3198                              (u32 __user *)reg->addr);
3199                 break;
3200         case KVM_REG_S390_EPOCHDIFF:
3201                 r = put_user(vcpu->arch.sie_block->epoch,
3202                              (u64 __user *)reg->addr);
3203                 break;
3204         case KVM_REG_S390_CPU_TIMER:
3205                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3206                              (u64 __user *)reg->addr);
3207                 break;
3208         case KVM_REG_S390_CLOCK_COMP:
3209                 r = put_user(vcpu->arch.sie_block->ckc,
3210                              (u64 __user *)reg->addr);
3211                 break;
3212         case KVM_REG_S390_PFTOKEN:
3213                 r = put_user(vcpu->arch.pfault_token,
3214                              (u64 __user *)reg->addr);
3215                 break;
3216         case KVM_REG_S390_PFCOMPARE:
3217                 r = put_user(vcpu->arch.pfault_compare,
3218                              (u64 __user *)reg->addr);
3219                 break;
3220         case KVM_REG_S390_PFSELECT:
3221                 r = put_user(vcpu->arch.pfault_select,
3222                              (u64 __user *)reg->addr);
3223                 break;
3224         case KVM_REG_S390_PP:
3225                 r = put_user(vcpu->arch.sie_block->pp,
3226                              (u64 __user *)reg->addr);
3227                 break;
3228         case KVM_REG_S390_GBEA:
3229                 r = put_user(vcpu->arch.sie_block->gbea,
3230                              (u64 __user *)reg->addr);
3231                 break;
3232         default:
3233                 break;
3234         }
3235
3236         return r;
3237 }
3238
3239 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3240                                            struct kvm_one_reg *reg)
3241 {
3242         int r = -EINVAL;
3243         __u64 val;
3244
3245         switch (reg->id) {
3246         case KVM_REG_S390_TODPR:
3247                 r = get_user(vcpu->arch.sie_block->todpr,
3248                              (u32 __user *)reg->addr);
3249                 break;
3250         case KVM_REG_S390_EPOCHDIFF:
3251                 r = get_user(vcpu->arch.sie_block->epoch,
3252                              (u64 __user *)reg->addr);
3253                 break;
3254         case KVM_REG_S390_CPU_TIMER:
3255                 r = get_user(val, (u64 __user *)reg->addr);
3256                 if (!r)
3257                         kvm_s390_set_cpu_timer(vcpu, val);
3258                 break;
3259         case KVM_REG_S390_CLOCK_COMP:
3260                 r = get_user(vcpu->arch.sie_block->ckc,
3261                              (u64 __user *)reg->addr);
3262                 break;
3263         case KVM_REG_S390_PFTOKEN:
3264                 r = get_user(vcpu->arch.pfault_token,
3265                              (u64 __user *)reg->addr);
3266                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3267                         kvm_clear_async_pf_completion_queue(vcpu);
3268                 break;
3269         case KVM_REG_S390_PFCOMPARE:
3270                 r = get_user(vcpu->arch.pfault_compare,
3271                              (u64 __user *)reg->addr);
3272                 break;
3273         case KVM_REG_S390_PFSELECT:
3274                 r = get_user(vcpu->arch.pfault_select,
3275                              (u64 __user *)reg->addr);
3276                 break;
3277         case KVM_REG_S390_PP:
3278                 r = get_user(vcpu->arch.sie_block->pp,
3279                              (u64 __user *)reg->addr);
3280                 break;
3281         case KVM_REG_S390_GBEA:
3282                 r = get_user(vcpu->arch.sie_block->gbea,
3283                              (u64 __user *)reg->addr);
3284                 break;
3285         default:
3286                 break;
3287         }
3288
3289         return r;
3290 }
3291
3292 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3293 {
3294         kvm_s390_vcpu_initial_reset(vcpu);
3295         return 0;
3296 }
3297
3298 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3299 {
3300         vcpu_load(vcpu);
3301         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3302         vcpu_put(vcpu);
3303         return 0;
3304 }
3305
3306 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3307 {
3308         vcpu_load(vcpu);
3309         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3310         vcpu_put(vcpu);
3311         return 0;
3312 }
3313
3314 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3315                                   struct kvm_sregs *sregs)
3316 {
3317         vcpu_load(vcpu);
3318
3319         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3320         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3321
3322         vcpu_put(vcpu);
3323         return 0;
3324 }
3325
3326 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3327                                   struct kvm_sregs *sregs)
3328 {
3329         vcpu_load(vcpu);
3330
3331         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3332         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3333
3334         vcpu_put(vcpu);
3335         return 0;
3336 }
3337
3338 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3339 {
3340         int ret = 0;
3341
3342         vcpu_load(vcpu);
3343
3344         if (test_fp_ctl(fpu->fpc)) {
3345                 ret = -EINVAL;
3346                 goto out;
3347         }
3348         vcpu->run->s.regs.fpc = fpu->fpc;
3349         if (MACHINE_HAS_VX)
3350                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3351                                  (freg_t *) fpu->fprs);
3352         else
3353                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3354
3355 out:
3356         vcpu_put(vcpu);
3357         return ret;
3358 }
3359
3360 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3361 {
3362         vcpu_load(vcpu);
3363
3364         /* make sure we have the latest values */
3365         save_fpu_regs();
3366         if (MACHINE_HAS_VX)
3367                 convert_vx_to_fp((freg_t *) fpu->fprs,
3368                                  (__vector128 *) vcpu->run->s.regs.vrs);
3369         else
3370                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3371         fpu->fpc = vcpu->run->s.regs.fpc;
3372
3373         vcpu_put(vcpu);
3374         return 0;
3375 }
3376
3377 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3378 {
3379         int rc = 0;
3380
3381         if (!is_vcpu_stopped(vcpu))
3382                 rc = -EBUSY;
3383         else {
3384                 vcpu->run->psw_mask = psw.mask;
3385                 vcpu->run->psw_addr = psw.addr;
3386         }
3387         return rc;
3388 }
3389
3390 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3391                                   struct kvm_translation *tr)
3392 {
3393         return -EINVAL; /* not implemented yet */
3394 }
3395
3396 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3397                               KVM_GUESTDBG_USE_HW_BP | \
3398                               KVM_GUESTDBG_ENABLE)
3399
3400 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3401                                         struct kvm_guest_debug *dbg)
3402 {
3403         int rc = 0;
3404
3405         vcpu_load(vcpu);
3406
3407         vcpu->guest_debug = 0;
3408         kvm_s390_clear_bp_data(vcpu);
3409
3410         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3411                 rc = -EINVAL;
3412                 goto out;
3413         }
3414         if (!sclp.has_gpere) {
3415                 rc = -EINVAL;
3416                 goto out;
3417         }
3418
3419         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3420                 vcpu->guest_debug = dbg->control;
3421                 /* enforce guest PER */
3422                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3423
3424                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3425                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3426         } else {
3427                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3428                 vcpu->arch.guestdbg.last_bp = 0;
3429         }
3430
3431         if (rc) {
3432                 vcpu->guest_debug = 0;
3433                 kvm_s390_clear_bp_data(vcpu);
3434                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3435         }
3436
3437 out:
3438         vcpu_put(vcpu);
3439         return rc;
3440 }
3441
3442 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3443                                     struct kvm_mp_state *mp_state)
3444 {
3445         int ret;
3446
3447         vcpu_load(vcpu);
3448
3449         /* CHECK_STOP and LOAD are not supported yet */
3450         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3451                                       KVM_MP_STATE_OPERATING;
3452
3453         vcpu_put(vcpu);
3454         return ret;
3455 }
3456
3457 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3458                                     struct kvm_mp_state *mp_state)
3459 {
3460         int rc = 0;
3461
3462         vcpu_load(vcpu);
3463
3464         /* user space knows about this interface - let it control the state */
3465         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3466
3467         switch (mp_state->mp_state) {
3468         case KVM_MP_STATE_STOPPED:
3469                 kvm_s390_vcpu_stop(vcpu);
3470                 break;
3471         case KVM_MP_STATE_OPERATING:
3472                 kvm_s390_vcpu_start(vcpu);
3473                 break;
3474         case KVM_MP_STATE_LOAD:
3475         case KVM_MP_STATE_CHECK_STOP:
3476                 /* fall through - CHECK_STOP and LOAD are not supported yet */
3477         default:
3478                 rc = -ENXIO;
3479         }
3480
3481         vcpu_put(vcpu);
3482         return rc;
3483 }
3484
3485 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3486 {
3487         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3488 }
3489
3490 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3491 {
3492 retry:
3493         kvm_s390_vcpu_request_handled(vcpu);
3494         if (!kvm_request_pending(vcpu))
3495                 return 0;
3496         /*
3497          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3498          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3499          * This ensures that the ipte instruction for this request has
3500          * already finished. We might race against a second unmapper that
3501          * wants to set the blocking bit. Lets just retry the request loop.
3502          */
3503         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3504                 int rc;
3505                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3506                                           kvm_s390_get_prefix(vcpu),
3507                                           PAGE_SIZE * 2, PROT_WRITE);
3508                 if (rc) {
3509                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3510                         return rc;
3511                 }
3512                 goto retry;
3513         }
3514
3515         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3516                 vcpu->arch.sie_block->ihcpu = 0xffff;
3517                 goto retry;
3518         }
3519
3520         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3521                 if (!ibs_enabled(vcpu)) {
3522                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3523                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3524                 }
3525                 goto retry;
3526         }
3527
3528         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3529                 if (ibs_enabled(vcpu)) {
3530                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3531                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3532                 }
3533                 goto retry;
3534         }
3535
3536         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3537                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3538                 goto retry;
3539         }
3540
3541         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3542                 /*
3543                  * Disable CMM virtualization; we will emulate the ESSA
3544                  * instruction manually, in order to provide additional
3545                  * functionalities needed for live migration.
3546                  */
3547                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3548                 goto retry;
3549         }
3550
3551         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3552                 /*
3553                  * Re-enable CMM virtualization if CMMA is available and
3554                  * CMM has been used.
3555                  */
3556                 if ((vcpu->kvm->arch.use_cmma) &&
3557                     (vcpu->kvm->mm->context.uses_cmm))
3558                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3559                 goto retry;
3560         }
3561
3562         /* nothing to do, just clear the request */
3563         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3564         /* we left the vsie handler, nothing to do, just clear the request */
3565         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3566
3567         return 0;
3568 }
3569
3570 void kvm_s390_set_tod_clock(struct kvm *kvm,
3571                             const struct kvm_s390_vm_tod_clock *gtod)
3572 {
3573         struct kvm_vcpu *vcpu;
3574         struct kvm_s390_tod_clock_ext htod;
3575         int i;
3576
3577         mutex_lock(&kvm->lock);
3578         preempt_disable();
3579
3580         get_tod_clock_ext((char *)&htod);
3581
3582         kvm->arch.epoch = gtod->tod - htod.tod;
3583         kvm->arch.epdx = 0;
3584         if (test_kvm_facility(kvm, 139)) {
3585                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3586                 if (kvm->arch.epoch > gtod->tod)
3587                         kvm->arch.epdx -= 1;
3588         }
3589
3590         kvm_s390_vcpu_block_all(kvm);
3591         kvm_for_each_vcpu(i, vcpu, kvm) {
3592                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3593                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3594         }
3595
3596         kvm_s390_vcpu_unblock_all(kvm);
3597         preempt_enable();
3598         mutex_unlock(&kvm->lock);
3599 }
3600
3601 /**
3602  * kvm_arch_fault_in_page - fault-in guest page if necessary
3603  * @vcpu: The corresponding virtual cpu
3604  * @gpa: Guest physical address
3605  * @writable: Whether the page should be writable or not
3606  *
3607  * Make sure that a guest page has been faulted-in on the host.
3608  *
3609  * Return: Zero on success, negative error code otherwise.
3610  */
3611 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3612 {
3613         return gmap_fault(vcpu->arch.gmap, gpa,
3614                           writable ? FAULT_FLAG_WRITE : 0);
3615 }
3616
3617 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3618                                       unsigned long token)
3619 {
3620         struct kvm_s390_interrupt inti;
3621         struct kvm_s390_irq irq;
3622
3623         if (start_token) {
3624                 irq.u.ext.ext_params2 = token;
3625                 irq.type = KVM_S390_INT_PFAULT_INIT;
3626                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3627         } else {
3628                 inti.type = KVM_S390_INT_PFAULT_DONE;
3629                 inti.parm64 = token;
3630                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3631         }
3632 }
3633
3634 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3635                                      struct kvm_async_pf *work)
3636 {
3637         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3638         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3639 }
3640
3641 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3642                                  struct kvm_async_pf *work)
3643 {
3644         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3645         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3646 }
3647
3648 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3649                                struct kvm_async_pf *work)
3650 {
3651         /* s390 will always inject the page directly */
3652 }
3653
3654 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3655 {
3656         /*
3657          * s390 will always inject the page directly,
3658          * but we still want check_async_completion to cleanup
3659          */
3660         return true;
3661 }
3662
3663 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3664 {
3665         hva_t hva;
3666         struct kvm_arch_async_pf arch;
3667         int rc;
3668
3669         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3670                 return 0;
3671         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3672             vcpu->arch.pfault_compare)
3673                 return 0;
3674         if (psw_extint_disabled(vcpu))
3675                 return 0;
3676         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3677                 return 0;
3678         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3679                 return 0;
3680         if (!vcpu->arch.gmap->pfault_enabled)
3681                 return 0;
3682
3683         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3684         hva += current->thread.gmap_addr & ~PAGE_MASK;
3685         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3686                 return 0;
3687
3688         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3689         return rc;
3690 }
3691
3692 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3693 {
3694         int rc, cpuflags;
3695
3696         /*
3697          * On s390 notifications for arriving pages will be delivered directly
3698          * to the guest but the house keeping for completed pfaults is
3699          * handled outside the worker.
3700          */
3701         kvm_check_async_pf_completion(vcpu);
3702
3703         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3704         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3705
3706         if (need_resched())
3707                 schedule();
3708
3709         if (test_cpu_flag(CIF_MCCK_PENDING))
3710                 s390_handle_mcck();
3711
3712         if (!kvm_is_ucontrol(vcpu->kvm)) {
3713                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
3714                 if (rc)
3715                         return rc;
3716         }
3717
3718         rc = kvm_s390_handle_requests(vcpu);
3719         if (rc)
3720                 return rc;
3721
3722         if (guestdbg_enabled(vcpu)) {
3723                 kvm_s390_backup_guest_per_regs(vcpu);
3724                 kvm_s390_patch_guest_per_regs(vcpu);
3725         }
3726
3727         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
3728
3729         vcpu->arch.sie_block->icptcode = 0;
3730         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3731         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3732         trace_kvm_s390_sie_enter(vcpu, cpuflags);
3733
3734         return 0;
3735 }
3736
3737 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3738 {
3739         struct kvm_s390_pgm_info pgm_info = {
3740                 .code = PGM_ADDRESSING,
3741         };
3742         u8 opcode, ilen;
3743         int rc;
3744
3745         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3746         trace_kvm_s390_sie_fault(vcpu);
3747
3748         /*
3749          * We want to inject an addressing exception, which is defined as a
3750          * suppressing or terminating exception. However, since we came here
3751          * by a DAT access exception, the PSW still points to the faulting
3752          * instruction since DAT exceptions are nullifying. So we've got
3753          * to look up the current opcode to get the length of the instruction
3754          * to be able to forward the PSW.
3755          */
3756         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3757         ilen = insn_length(opcode);
3758         if (rc < 0) {
3759                 return rc;
3760         } else if (rc) {
3761                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
3762                  * Forward by arbitrary ilc, injection will take care of
3763                  * nullification if necessary.
3764                  */
3765                 pgm_info = vcpu->arch.pgm;
3766                 ilen = 4;
3767         }
3768         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3769         kvm_s390_forward_psw(vcpu, ilen);
3770         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3771 }
3772
3773 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3774 {
3775         struct mcck_volatile_info *mcck_info;
3776         struct sie_page *sie_page;
3777
3778         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3779                    vcpu->arch.sie_block->icptcode);
3780         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3781
3782         if (guestdbg_enabled(vcpu))
3783                 kvm_s390_restore_guest_per_regs(vcpu);
3784
3785         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3786         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3787
3788         if (exit_reason == -EINTR) {
3789                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
3790                 sie_page = container_of(vcpu->arch.sie_block,
3791                                         struct sie_page, sie_block);
3792                 mcck_info = &sie_page->mcck_info;
3793                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
3794                 return 0;
3795         }
3796
3797         if (vcpu->arch.sie_block->icptcode > 0) {
3798                 int rc = kvm_handle_sie_intercept(vcpu);
3799
3800                 if (rc != -EOPNOTSUPP)
3801                         return rc;
3802                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3803                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3804                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3805                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3806                 return -EREMOTE;
3807         } else if (exit_reason != -EFAULT) {
3808                 vcpu->stat.exit_null++;
3809                 return 0;
3810         } else if (kvm_is_ucontrol(vcpu->kvm)) {
3811                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3812                 vcpu->run->s390_ucontrol.trans_exc_code =
3813                                                 current->thread.gmap_addr;
3814                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
3815                 return -EREMOTE;
3816         } else if (current->thread.gmap_pfault) {
3817                 trace_kvm_s390_major_guest_pfault(vcpu);
3818                 current->thread.gmap_pfault = 0;
3819                 if (kvm_arch_setup_async_pf(vcpu))
3820                         return 0;
3821                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3822         }
3823         return vcpu_post_run_fault_in_sie(vcpu);
3824 }
3825
3826 static int __vcpu_run(struct kvm_vcpu *vcpu)
3827 {
3828         int rc, exit_reason;
3829
3830         /*
3831          * We try to hold kvm->srcu during most of vcpu_run (except when run-
3832          * ning the guest), so that memslots (and other stuff) are protected
3833          */
3834         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3835
3836         do {
3837                 rc = vcpu_pre_run(vcpu);
3838                 if (rc)
3839                         break;
3840
3841                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3842                 /*
3843                  * As PF_VCPU will be used in fault handler, between
3844                  * guest_enter and guest_exit should be no uaccess.
3845                  */
3846                 local_irq_disable();
3847                 guest_enter_irqoff();
3848                 __disable_cpu_timer_accounting(vcpu);
3849                 local_irq_enable();
3850                 exit_reason = sie64a(vcpu->arch.sie_block,
3851                                      vcpu->run->s.regs.gprs);
3852                 local_irq_disable();
3853                 __enable_cpu_timer_accounting(vcpu);
3854                 guest_exit_irqoff();
3855                 local_irq_enable();
3856                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3857
3858                 rc = vcpu_post_run(vcpu, exit_reason);
3859         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3860
3861         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3862         return rc;
3863 }
3864
3865 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3866 {
3867         struct runtime_instr_cb *riccb;
3868         struct gs_cb *gscb;
3869
3870         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3871         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3872         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3873         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3874         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3875                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3876         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3877                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3878                 /* some control register changes require a tlb flush */
3879                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3880         }
3881         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3882                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3883                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3884                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3885                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3886                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3887         }
3888         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3889                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3890                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3891                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3892                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3893                         kvm_clear_async_pf_completion_queue(vcpu);
3894         }
3895         /*
3896          * If userspace sets the riccb (e.g. after migration) to a valid state,
3897          * we should enable RI here instead of doing the lazy enablement.
3898          */
3899         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3900             test_kvm_facility(vcpu->kvm, 64) &&
3901             riccb->v &&
3902             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3903                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3904                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3905         }
3906         /*
3907          * If userspace sets the gscb (e.g. after migration) to non-zero,
3908          * we should enable GS here instead of doing the lazy enablement.
3909          */
3910         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3911             test_kvm_facility(vcpu->kvm, 133) &&
3912             gscb->gssm &&
3913             !vcpu->arch.gs_enabled) {
3914                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3915                 vcpu->arch.sie_block->ecb |= ECB_GS;
3916                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3917                 vcpu->arch.gs_enabled = 1;
3918         }
3919         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
3920             test_kvm_facility(vcpu->kvm, 82)) {
3921                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3922                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
3923         }
3924         save_access_regs(vcpu->arch.host_acrs);
3925         restore_access_regs(vcpu->run->s.regs.acrs);
3926         /* save host (userspace) fprs/vrs */
3927         save_fpu_regs();
3928         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3929         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3930         if (MACHINE_HAS_VX)
3931                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3932         else
3933                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3934         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3935         if (test_fp_ctl(current->thread.fpu.fpc))
3936                 /* User space provided an invalid FPC, let's clear it */
3937                 current->thread.fpu.fpc = 0;
3938         if (MACHINE_HAS_GS) {
3939                 preempt_disable();
3940                 __ctl_set_bit(2, 4);
3941                 if (current->thread.gs_cb) {
3942                         vcpu->arch.host_gscb = current->thread.gs_cb;
3943                         save_gs_cb(vcpu->arch.host_gscb);
3944                 }
3945                 if (vcpu->arch.gs_enabled) {
3946                         current->thread.gs_cb = (struct gs_cb *)
3947                                                 &vcpu->run->s.regs.gscb;
3948                         restore_gs_cb(current->thread.gs_cb);
3949                 }
3950                 preempt_enable();
3951         }
3952         /* SIE will load etoken directly from SDNX and therefore kvm_run */
3953
3954         kvm_run->kvm_dirty_regs = 0;
3955 }
3956
3957 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3958 {
3959         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3960         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3961         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3962         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3963         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3964         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3965         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3966         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3967         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3968         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3969         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3970         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3971         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
3972         save_access_regs(vcpu->run->s.regs.acrs);
3973         restore_access_regs(vcpu->arch.host_acrs);
3974         /* Save guest register state */
3975         save_fpu_regs();
3976         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3977         /* Restore will be done lazily at return */
3978         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3979         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3980         if (MACHINE_HAS_GS) {
3981                 __ctl_set_bit(2, 4);
3982                 if (vcpu->arch.gs_enabled)
3983                         save_gs_cb(current->thread.gs_cb);
3984                 preempt_disable();
3985                 current->thread.gs_cb = vcpu->arch.host_gscb;
3986                 restore_gs_cb(vcpu->arch.host_gscb);
3987                 preempt_enable();
3988                 if (!vcpu->arch.host_gscb)
3989                         __ctl_clear_bit(2, 4);
3990                 vcpu->arch.host_gscb = NULL;
3991         }
3992         /* SIE will save etoken directly into SDNX and therefore kvm_run */
3993 }
3994
3995 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3996 {
3997         int rc;
3998
3999         if (kvm_run->immediate_exit)
4000                 return -EINTR;
4001
4002         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4003             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4004                 return -EINVAL;
4005
4006         vcpu_load(vcpu);
4007
4008         if (guestdbg_exit_pending(vcpu)) {
4009                 kvm_s390_prepare_debug_exit(vcpu);
4010                 rc = 0;
4011                 goto out;
4012         }
4013
4014         kvm_sigset_activate(vcpu);
4015
4016         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4017                 kvm_s390_vcpu_start(vcpu);
4018         } else if (is_vcpu_stopped(vcpu)) {
4019                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4020                                    vcpu->vcpu_id);
4021                 rc = -EINVAL;
4022                 goto out;
4023         }
4024
4025         sync_regs(vcpu, kvm_run);
4026         enable_cpu_timer_accounting(vcpu);
4027
4028         might_fault();
4029         rc = __vcpu_run(vcpu);
4030
4031         if (signal_pending(current) && !rc) {
4032                 kvm_run->exit_reason = KVM_EXIT_INTR;
4033                 rc = -EINTR;
4034         }
4035
4036         if (guestdbg_exit_pending(vcpu) && !rc)  {
4037                 kvm_s390_prepare_debug_exit(vcpu);
4038                 rc = 0;
4039         }
4040
4041         if (rc == -EREMOTE) {
4042                 /* userspace support is needed, kvm_run has been prepared */
4043                 rc = 0;
4044         }
4045
4046         disable_cpu_timer_accounting(vcpu);
4047         store_regs(vcpu, kvm_run);
4048
4049         kvm_sigset_deactivate(vcpu);
4050
4051         vcpu->stat.exit_userspace++;
4052 out:
4053         vcpu_put(vcpu);
4054         return rc;
4055 }
4056
4057 /*
4058  * store status at address
4059  * we use have two special cases:
4060  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4061  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4062  */
4063 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4064 {
4065         unsigned char archmode = 1;
4066         freg_t fprs[NUM_FPRS];
4067         unsigned int px;
4068         u64 clkcomp, cputm;
4069         int rc;
4070
4071         px = kvm_s390_get_prefix(vcpu);
4072         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4073                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4074                         return -EFAULT;
4075                 gpa = 0;
4076         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4077                 if (write_guest_real(vcpu, 163, &archmode, 1))
4078                         return -EFAULT;
4079                 gpa = px;
4080         } else
4081                 gpa -= __LC_FPREGS_SAVE_AREA;
4082
4083         /* manually convert vector registers if necessary */
4084         if (MACHINE_HAS_VX) {
4085                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4086                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4087                                      fprs, 128);
4088         } else {
4089                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4090                                      vcpu->run->s.regs.fprs, 128);
4091         }
4092         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4093                               vcpu->run->s.regs.gprs, 128);
4094         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4095                               &vcpu->arch.sie_block->gpsw, 16);
4096         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4097                               &px, 4);
4098         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4099                               &vcpu->run->s.regs.fpc, 4);
4100         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4101                               &vcpu->arch.sie_block->todpr, 4);
4102         cputm = kvm_s390_get_cpu_timer(vcpu);
4103         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4104                               &cputm, 8);
4105         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4106         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4107                               &clkcomp, 8);
4108         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4109                               &vcpu->run->s.regs.acrs, 64);
4110         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4111                               &vcpu->arch.sie_block->gcr, 128);
4112         return rc ? -EFAULT : 0;
4113 }
4114
4115 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4116 {
4117         /*
4118          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4119          * switch in the run ioctl. Let's update our copies before we save
4120          * it into the save area
4121          */
4122         save_fpu_regs();
4123         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4124         save_access_regs(vcpu->run->s.regs.acrs);
4125
4126         return kvm_s390_store_status_unloaded(vcpu, addr);
4127 }
4128
4129 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4130 {
4131         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4132         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4133 }
4134
4135 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4136 {
4137         unsigned int i;
4138         struct kvm_vcpu *vcpu;
4139
4140         kvm_for_each_vcpu(i, vcpu, kvm) {
4141                 __disable_ibs_on_vcpu(vcpu);
4142         }
4143 }
4144
4145 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4146 {
4147         if (!sclp.has_ibs)
4148                 return;
4149         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4150         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4151 }
4152
4153 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4154 {
4155         int i, online_vcpus, started_vcpus = 0;
4156
4157         if (!is_vcpu_stopped(vcpu))
4158                 return;
4159
4160         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4161         /* Only one cpu at a time may enter/leave the STOPPED state. */
4162         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4163         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4164
4165         for (i = 0; i < online_vcpus; i++) {
4166                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4167                         started_vcpus++;
4168         }
4169
4170         if (started_vcpus == 0) {
4171                 /* we're the only active VCPU -> speed it up */
4172                 __enable_ibs_on_vcpu(vcpu);
4173         } else if (started_vcpus == 1) {
4174                 /*
4175                  * As we are starting a second VCPU, we have to disable
4176                  * the IBS facility on all VCPUs to remove potentially
4177                  * oustanding ENABLE requests.
4178                  */
4179                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4180         }
4181
4182         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4183         /*
4184          * Another VCPU might have used IBS while we were offline.
4185          * Let's play safe and flush the VCPU at startup.
4186          */
4187         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4188         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4189         return;
4190 }
4191
4192 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4193 {
4194         int i, online_vcpus, started_vcpus = 0;
4195         struct kvm_vcpu *started_vcpu = NULL;
4196
4197         if (is_vcpu_stopped(vcpu))
4198                 return;
4199
4200         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4201         /* Only one cpu at a time may enter/leave the STOPPED state. */
4202         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4203         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4204
4205         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4206         kvm_s390_clear_stop_irq(vcpu);
4207
4208         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4209         __disable_ibs_on_vcpu(vcpu);
4210
4211         for (i = 0; i < online_vcpus; i++) {
4212                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4213                         started_vcpus++;
4214                         started_vcpu = vcpu->kvm->vcpus[i];
4215                 }
4216         }
4217
4218         if (started_vcpus == 1) {
4219                 /*
4220                  * As we only have one VCPU left, we want to enable the
4221                  * IBS facility for that VCPU to speed it up.
4222                  */
4223                 __enable_ibs_on_vcpu(started_vcpu);
4224         }
4225
4226         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4227         return;
4228 }
4229
4230 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4231                                      struct kvm_enable_cap *cap)
4232 {
4233         int r;
4234
4235         if (cap->flags)
4236                 return -EINVAL;
4237
4238         switch (cap->cap) {
4239         case KVM_CAP_S390_CSS_SUPPORT:
4240                 if (!vcpu->kvm->arch.css_support) {
4241                         vcpu->kvm->arch.css_support = 1;
4242                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4243                         trace_kvm_s390_enable_css(vcpu->kvm);
4244                 }
4245                 r = 0;
4246                 break;
4247         default:
4248                 r = -EINVAL;
4249                 break;
4250         }
4251         return r;
4252 }
4253
4254 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4255                                   struct kvm_s390_mem_op *mop)
4256 {
4257         void __user *uaddr = (void __user *)mop->buf;
4258         void *tmpbuf = NULL;
4259         int r, srcu_idx;
4260         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4261                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4262
4263         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4264                 return -EINVAL;
4265
4266         if (mop->size > MEM_OP_MAX_SIZE)
4267                 return -E2BIG;
4268
4269         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4270                 tmpbuf = vmalloc(mop->size);
4271                 if (!tmpbuf)
4272                         return -ENOMEM;
4273         }
4274
4275         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4276
4277         switch (mop->op) {
4278         case KVM_S390_MEMOP_LOGICAL_READ:
4279                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4280                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4281                                             mop->size, GACC_FETCH);
4282                         break;
4283                 }
4284                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4285                 if (r == 0) {
4286                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4287                                 r = -EFAULT;
4288                 }
4289                 break;
4290         case KVM_S390_MEMOP_LOGICAL_WRITE:
4291                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4292                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4293                                             mop->size, GACC_STORE);
4294                         break;
4295                 }
4296                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4297                         r = -EFAULT;
4298                         break;
4299                 }
4300                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4301                 break;
4302         default:
4303                 r = -EINVAL;
4304         }
4305
4306         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4307
4308         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4309                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4310
4311         vfree(tmpbuf);
4312         return r;
4313 }
4314
4315 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4316                                unsigned int ioctl, unsigned long arg)
4317 {
4318         struct kvm_vcpu *vcpu = filp->private_data;
4319         void __user *argp = (void __user *)arg;
4320
4321         switch (ioctl) {
4322         case KVM_S390_IRQ: {
4323                 struct kvm_s390_irq s390irq;
4324
4325                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4326                         return -EFAULT;
4327                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4328         }
4329         case KVM_S390_INTERRUPT: {
4330                 struct kvm_s390_interrupt s390int;
4331                 struct kvm_s390_irq s390irq = {};
4332
4333                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4334                         return -EFAULT;
4335                 if (s390int_to_s390irq(&s390int, &s390irq))
4336                         return -EINVAL;
4337                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4338         }
4339         }
4340         return -ENOIOCTLCMD;
4341 }
4342
4343 long kvm_arch_vcpu_ioctl(struct file *filp,
4344                          unsigned int ioctl, unsigned long arg)
4345 {
4346         struct kvm_vcpu *vcpu = filp->private_data;
4347         void __user *argp = (void __user *)arg;
4348         int idx;
4349         long r;
4350
4351         vcpu_load(vcpu);
4352
4353         switch (ioctl) {
4354         case KVM_S390_STORE_STATUS:
4355                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4356                 r = kvm_s390_vcpu_store_status(vcpu, arg);
4357                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4358                 break;
4359         case KVM_S390_SET_INITIAL_PSW: {
4360                 psw_t psw;
4361
4362                 r = -EFAULT;
4363                 if (copy_from_user(&psw, argp, sizeof(psw)))
4364                         break;
4365                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4366                 break;
4367         }
4368         case KVM_S390_INITIAL_RESET:
4369                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4370                 break;
4371         case KVM_SET_ONE_REG:
4372         case KVM_GET_ONE_REG: {
4373                 struct kvm_one_reg reg;
4374                 r = -EFAULT;
4375                 if (copy_from_user(&reg, argp, sizeof(reg)))
4376                         break;
4377                 if (ioctl == KVM_SET_ONE_REG)
4378                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4379                 else
4380                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4381                 break;
4382         }
4383 #ifdef CONFIG_KVM_S390_UCONTROL
4384         case KVM_S390_UCAS_MAP: {
4385                 struct kvm_s390_ucas_mapping ucasmap;
4386
4387                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4388                         r = -EFAULT;
4389                         break;
4390                 }
4391
4392                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4393                         r = -EINVAL;
4394                         break;
4395                 }
4396
4397                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4398                                      ucasmap.vcpu_addr, ucasmap.length);
4399                 break;
4400         }
4401         case KVM_S390_UCAS_UNMAP: {
4402                 struct kvm_s390_ucas_mapping ucasmap;
4403
4404                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4405                         r = -EFAULT;
4406                         break;
4407                 }
4408
4409                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4410                         r = -EINVAL;
4411                         break;
4412                 }
4413
4414                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4415                         ucasmap.length);
4416                 break;
4417         }
4418 #endif
4419         case KVM_S390_VCPU_FAULT: {
4420                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4421                 break;
4422         }
4423         case KVM_ENABLE_CAP:
4424         {
4425                 struct kvm_enable_cap cap;
4426                 r = -EFAULT;
4427                 if (copy_from_user(&cap, argp, sizeof(cap)))
4428                         break;
4429                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4430                 break;
4431         }
4432         case KVM_S390_MEM_OP: {
4433                 struct kvm_s390_mem_op mem_op;
4434
4435                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4436                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
4437                 else
4438                         r = -EFAULT;
4439                 break;
4440         }
4441         case KVM_S390_SET_IRQ_STATE: {
4442                 struct kvm_s390_irq_state irq_state;
4443
4444                 r = -EFAULT;
4445                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4446                         break;
4447                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4448                     irq_state.len == 0 ||
4449                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4450                         r = -EINVAL;
4451                         break;
4452                 }
4453                 /* do not use irq_state.flags, it will break old QEMUs */
4454                 r = kvm_s390_set_irq_state(vcpu,
4455                                            (void __user *) irq_state.buf,
4456                                            irq_state.len);
4457                 break;
4458         }
4459         case KVM_S390_GET_IRQ_STATE: {
4460                 struct kvm_s390_irq_state irq_state;
4461
4462                 r = -EFAULT;
4463                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4464                         break;
4465                 if (irq_state.len == 0) {
4466                         r = -EINVAL;
4467                         break;
4468                 }
4469                 /* do not use irq_state.flags, it will break old QEMUs */
4470                 r = kvm_s390_get_irq_state(vcpu,
4471                                            (__u8 __user *)  irq_state.buf,
4472                                            irq_state.len);
4473                 break;
4474         }
4475         default:
4476                 r = -ENOTTY;
4477         }
4478
4479         vcpu_put(vcpu);
4480         return r;
4481 }
4482
4483 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4484 {
4485 #ifdef CONFIG_KVM_S390_UCONTROL
4486         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4487                  && (kvm_is_ucontrol(vcpu->kvm))) {
4488                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4489                 get_page(vmf->page);
4490                 return 0;
4491         }
4492 #endif
4493         return VM_FAULT_SIGBUS;
4494 }
4495
4496 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
4497                             unsigned long npages)
4498 {
4499         return 0;
4500 }
4501
4502 /* Section: memory related */
4503 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4504                                    struct kvm_memory_slot *memslot,
4505                                    const struct kvm_userspace_memory_region *mem,
4506                                    enum kvm_mr_change change)
4507 {
4508         /* A few sanity checks. We can have memory slots which have to be
4509            located/ended at a segment boundary (1MB). The memory in userland is
4510            ok to be fragmented into various different vmas. It is okay to mmap()
4511            and munmap() stuff in this slot after doing this call at any time */
4512
4513         if (mem->userspace_addr & 0xffffful)
4514                 return -EINVAL;
4515
4516         if (mem->memory_size & 0xffffful)
4517                 return -EINVAL;
4518
4519         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4520                 return -EINVAL;
4521
4522         return 0;
4523 }
4524
4525 void kvm_arch_commit_memory_region(struct kvm *kvm,
4526                                 const struct kvm_userspace_memory_region *mem,
4527                                 const struct kvm_memory_slot *old,
4528                                 const struct kvm_memory_slot *new,
4529                                 enum kvm_mr_change change)
4530 {
4531         int rc = 0;
4532
4533         switch (change) {
4534         case KVM_MR_DELETE:
4535                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4536                                         old->npages * PAGE_SIZE);
4537                 break;
4538         case KVM_MR_MOVE:
4539                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4540                                         old->npages * PAGE_SIZE);
4541                 if (rc)
4542                         break;
4543                 /* FALLTHROUGH */
4544         case KVM_MR_CREATE:
4545                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4546                                       mem->guest_phys_addr, mem->memory_size);
4547                 break;
4548         case KVM_MR_FLAGS_ONLY:
4549                 break;
4550         default:
4551                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
4552         }
4553         if (rc)
4554                 pr_warn("failed to commit memory region\n");
4555         return;
4556 }
4557
4558 static inline unsigned long nonhyp_mask(int i)
4559 {
4560         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
4561
4562         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
4563 }
4564
4565 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
4566 {
4567         vcpu->valid_wakeup = false;
4568 }
4569
4570 static int __init kvm_s390_init(void)
4571 {
4572         int i;
4573
4574         if (!sclp.has_sief2) {
4575                 pr_info("SIE is not available\n");
4576                 return -ENODEV;
4577         }
4578
4579         if (nested && hpage) {
4580                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
4581                 return -EINVAL;
4582         }
4583
4584         for (i = 0; i < 16; i++)
4585                 kvm_s390_fac_base[i] |=
4586                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
4587
4588         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
4589 }
4590
4591 static void __exit kvm_s390_exit(void)
4592 {
4593         kvm_exit();
4594 }
4595
4596 module_init(kvm_s390_init);
4597 module_exit(kvm_s390_exit);
4598
4599 /*
4600  * Enable autoloading of the kvm module.
4601  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
4602  * since x86 takes a different approach.
4603  */
4604 #include <linux/miscdevice.h>
4605 MODULE_ALIAS_MISCDEV(KVM_MINOR);
4606 MODULE_ALIAS("devname:kvm");