arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 struct kvm_stats_debugfs_item debugfs_entries[] = {
  62         VCPU_STAT("userspace_handled", exit_userspace),
  63         VCPU_STAT("exit_null", exit_null),
  64         VCPU_STAT("pfault_sync", pfault_sync),
  65         VCPU_STAT("exit_validity", exit_validity),
  66         VCPU_STAT("exit_stop_request", exit_stop_request),
  67         VCPU_STAT("exit_external_request", exit_external_request),
  68         VCPU_STAT("exit_io_request", exit_io_request),
  69         VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
  70         VCPU_STAT("exit_instruction", exit_instruction),
  71         VCPU_STAT("exit_pei", exit_pei),
  72         VCPU_STAT("exit_program_interruption", exit_program_interruption),
  73         VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
  74         VCPU_STAT("exit_operation_exception", exit_operation_exception),
  75         VCPU_STAT("halt_successful_poll", halt_successful_poll),
  76         VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
  77         VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
  78         VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
  79         VCPU_STAT("halt_wakeup", halt_wakeup),
  80         VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
  81         VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
  82         VCPU_STAT("instruction_lctlg", instruction_lctlg),
  83         VCPU_STAT("instruction_lctl", instruction_lctl),
  84         VCPU_STAT("instruction_stctl", instruction_stctl),
  85         VCPU_STAT("instruction_stctg", instruction_stctg),
  86         VCPU_STAT("deliver_ckc", deliver_ckc),
  87         VCPU_STAT("deliver_cputm", deliver_cputm),
  88         VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
  89         VCPU_STAT("deliver_external_call", deliver_external_call),
  90         VCPU_STAT("deliver_service_signal", deliver_service_signal),
  91         VCPU_STAT("deliver_virtio", deliver_virtio),
  92         VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
  93         VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
  94         VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
  95         VCPU_STAT("deliver_program", deliver_program),
  96         VCPU_STAT("deliver_io", deliver_io),
  97         VCPU_STAT("deliver_machine_check", deliver_machine_check),
  98         VCPU_STAT("exit_wait_state", exit_wait_state),
  99         VCPU_STAT("inject_ckc", inject_ckc),
 100         VCPU_STAT("inject_cputm", inject_cputm),
 101         VCPU_STAT("inject_external_call", inject_external_call),
 102         VM_STAT("inject_float_mchk", inject_float_mchk),
 103         VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
 104         VM_STAT("inject_io", inject_io),
 105         VCPU_STAT("inject_mchk", inject_mchk),
 106         VM_STAT("inject_pfault_done", inject_pfault_done),
 107         VCPU_STAT("inject_program", inject_program),
 108         VCPU_STAT("inject_restart", inject_restart),
 109         VM_STAT("inject_service_signal", inject_service_signal),
 110         VCPU_STAT("inject_set_prefix", inject_set_prefix),
 111         VCPU_STAT("inject_stop_signal", inject_stop_signal),
 112         VCPU_STAT("inject_pfault_init", inject_pfault_init),
 113         VM_STAT("inject_virtio", inject_virtio),
 114         VCPU_STAT("instruction_epsw", instruction_epsw),
 115         VCPU_STAT("instruction_gs", instruction_gs),
 116         VCPU_STAT("instruction_io_other", instruction_io_other),
 117         VCPU_STAT("instruction_lpsw", instruction_lpsw),
 118         VCPU_STAT("instruction_lpswe", instruction_lpswe),
 119         VCPU_STAT("instruction_pfmf", instruction_pfmf),
 120         VCPU_STAT("instruction_ptff", instruction_ptff),
 121         VCPU_STAT("instruction_stidp", instruction_stidp),
 122         VCPU_STAT("instruction_sck", instruction_sck),
 123         VCPU_STAT("instruction_sckpf", instruction_sckpf),
 124         VCPU_STAT("instruction_spx", instruction_spx),
 125         VCPU_STAT("instruction_stpx", instruction_stpx),
 126         VCPU_STAT("instruction_stap", instruction_stap),
 127         VCPU_STAT("instruction_iske", instruction_iske),
 128         VCPU_STAT("instruction_ri", instruction_ri),
 129         VCPU_STAT("instruction_rrbe", instruction_rrbe),
 130         VCPU_STAT("instruction_sske", instruction_sske),
 131         VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
 132         VCPU_STAT("instruction_essa", instruction_essa),
 133         VCPU_STAT("instruction_stsi", instruction_stsi),
 134         VCPU_STAT("instruction_stfl", instruction_stfl),
 135         VCPU_STAT("instruction_tb", instruction_tb),
 136         VCPU_STAT("instruction_tpi", instruction_tpi),
 137         VCPU_STAT("instruction_tprot", instruction_tprot),
 138         VCPU_STAT("instruction_tsch", instruction_tsch),
 139         VCPU_STAT("instruction_sthyi", instruction_sthyi),
 140         VCPU_STAT("instruction_sie", instruction_sie),
 141         VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
 142         VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
 143         VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
 144         VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
 145         VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
 146         VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
 147         VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
 148         VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
 149         VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
 150         VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
 151         VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
 152         VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
 153         VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
 154         VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
 155         VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
 156         VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
 157         VCPU_STAT("instruction_diag_10", diagnose_10),
 158         VCPU_STAT("instruction_diag_44", diagnose_44),
 159         VCPU_STAT("instruction_diag_9c", diagnose_9c),
 160         VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
 161         VCPU_STAT("diag_9c_forward", diagnose_9c_forward),
 162         VCPU_STAT("instruction_diag_258", diagnose_258),
 163         VCPU_STAT("instruction_diag_308", diagnose_308),
 164         VCPU_STAT("instruction_diag_500", diagnose_500),
 165         VCPU_STAT("instruction_diag_other", diagnose_other),
 166         { NULL }
 167 };
 168
 169 /* allow nested virtualization in KVM (if enabled by user space) */
 170 static int nested;
 171 module_param(nested, int, S_IRUGO);
 172 MODULE_PARM_DESC(nested, "Nested virtualization support");
 173
 174 /* allow 1m huge page guest backing, if !nested */
 175 static int hpage;
 176 module_param(hpage, int, 0444);
 177 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 178
 179 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 180 static u8 halt_poll_max_steal = 10;
 181 module_param(halt_poll_max_steal, byte, 0644);
 182 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 183
 184 /* if set to true, the GISA will be initialized and used if available */
 185 static bool use_gisa  = true;
 186 module_param(use_gisa, bool, 0644);
 187 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 188
 189 /* maximum diag9c forwarding per second */
 190 unsigned int diag9c_forwarding_hz;
 191 module_param(diag9c_forwarding_hz, uint, 0644);
 192 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 193
 194 /*
 195  * For now we handle at most 16 double words as this is what the s390 base
 196  * kernel handles and stores in the prefix page. If we ever need to go beyond
 197  * this, this requires changes to code, but the external uapi can stay.
 198  */
 199 #define SIZE_INTERNAL 16
 200
 201 /*
 202  * Base feature mask that defines default mask for facilities. Consists of the
 203  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 204  */
 205 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 206 /*
 207  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 208  * and defines the facilities that can be enabled via a cpu model.
 209  */
 210 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 211
 212 static unsigned long kvm_s390_fac_size(void)
 213 {
 214         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 215         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 216         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 217                 sizeof(S390_lowcore.stfle_fac_list));
 218
 219         return SIZE_INTERNAL;
 220 }
 221
 222 /* available cpu features supported by kvm */
 223 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 224 /* available subfunctions indicated via query / "test bit" */
 225 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 226
 227 static struct gmap_notifier gmap_notifier;
 228 static struct gmap_notifier vsie_gmap_notifier;
 229 debug_info_t *kvm_s390_dbf;
 230 debug_info_t *kvm_s390_dbf_uv;
 231
 232 /* Section: not file related */
 233 int kvm_arch_hardware_enable(void)
 234 {
 235         /* every s390 is virtualization enabled ;-) */
 236         return 0;
 237 }
 238
 239 int kvm_arch_check_processor_compat(void *opaque)
 240 {
 241         return 0;
 242 }
 243
 244 /* forward declarations */
 245 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 246                               unsigned long end);
 247 static int sca_switch_to_extended(struct kvm *kvm);
 248
 249 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 250 {
 251         u8 delta_idx = 0;
 252
 253         /*
 254          * The TOD jumps by delta, we have to compensate this by adding
 255          * -delta to the epoch.
 256          */
 257         delta = -delta;
 258
 259         /* sign-extension - we're adding to signed values below */
 260         if ((s64)delta < 0)
 261                 delta_idx = -1;
 262
 263         scb->epoch += delta;
 264         if (scb->ecd & ECD_MEF) {
 265                 scb->epdx += delta_idx;
 266                 if (scb->epoch < delta)
 267                         scb->epdx += 1;
 268         }
 269 }
 270
 271 /*
 272  * This callback is executed during stop_machine(). All CPUs are therefore
 273  * temporarily stopped. In order not to change guest behavior, we have to
 274  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 275  * so a CPU won't be stopped while calculating with the epoch.
 276  */
 277 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 278                           void *v)
 279 {
 280         struct kvm *kvm;
 281         struct kvm_vcpu *vcpu;
 282         int i;
 283         unsigned long long *delta = v;
 284
 285         list_for_each_entry(kvm, &vm_list, vm_list) {
 286                 kvm_for_each_vcpu(i, vcpu, kvm) {
 287                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 288                         if (i == 0) {
 289                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 290                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 291                         }
 292                         if (vcpu->arch.cputm_enabled)
 293                                 vcpu->arch.cputm_start += *delta;
 294                         if (vcpu->arch.vsie_block)
 295                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 296                                                    *delta);
 297                 }
 298         }
 299         return NOTIFY_OK;
 300 }
 301
 302 static struct notifier_block kvm_clock_notifier = {
 303         .notifier_call = kvm_clock_sync,
 304 };
 305
 306 int kvm_arch_hardware_setup(void *opaque)
 307 {
 308         gmap_notifier.notifier_call = kvm_gmap_notifier;
 309         gmap_register_pte_notifier(&gmap_notifier);
 310         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 311         gmap_register_pte_notifier(&vsie_gmap_notifier);
 312         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 313                                        &kvm_clock_notifier);
 314         return 0;
 315 }
 316
 317 void kvm_arch_hardware_unsetup(void)
 318 {
 319         gmap_unregister_pte_notifier(&gmap_notifier);
 320         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 321         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 322                                          &kvm_clock_notifier);
 323 }
 324
 325 static void allow_cpu_feat(unsigned long nr)
 326 {
 327         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 328 }
 329
 330 static inline int plo_test_bit(unsigned char nr)
 331 {
 332         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 333         int cc;
 334
 335         asm volatile(
 336                 /* Parameter registers are ignored for "test bit" */
 337                 "       plo     0,0,0,0(0)\n"
 338                 "       ipm     %0\n"
 339                 "       srl     %0,28\n"
 340                 : "=d" (cc)
 341                 : "d" (r0)
 342                 : "cc");
 343         return cc == 0;
 344 }
 345
 346 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 347 {
 348         register unsigned long r0 asm("0") = 0; /* query function */
 349         register unsigned long r1 asm("1") = (unsigned long) query;
 350
 351         asm volatile(
 352                 /* Parameter regs are ignored */
 353                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 354                 :
 355                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 356                 : "cc", "memory");
 357 }
 358
 359 #define INSN_SORTL 0xb938
 360 #define INSN_DFLTCC 0xb939
 361
 362 static void kvm_s390_cpu_feat_init(void)
 363 {
 364         int i;
 365
 366         for (i = 0; i < 256; ++i) {
 367                 if (plo_test_bit(i))
 368                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 369         }
 370
 371         if (test_facility(28)) /* TOD-clock steering */
 372                 ptff(kvm_s390_available_subfunc.ptff,
 373                      sizeof(kvm_s390_available_subfunc.ptff),
 374                      PTFF_QAF);
 375
 376         if (test_facility(17)) { /* MSA */
 377                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 378                               kvm_s390_available_subfunc.kmac);
 379                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 380                               kvm_s390_available_subfunc.kmc);
 381                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 382                               kvm_s390_available_subfunc.km);
 383                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 384                               kvm_s390_available_subfunc.kimd);
 385                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 386                               kvm_s390_available_subfunc.klmd);
 387         }
 388         if (test_facility(76)) /* MSA3 */
 389                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 390                               kvm_s390_available_subfunc.pckmo);
 391         if (test_facility(77)) { /* MSA4 */
 392                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.kmctr);
 394                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 395                               kvm_s390_available_subfunc.kmf);
 396                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 397                               kvm_s390_available_subfunc.kmo);
 398                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 399                               kvm_s390_available_subfunc.pcc);
 400         }
 401         if (test_facility(57)) /* MSA5 */
 402                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 403                               kvm_s390_available_subfunc.ppno);
 404
 405         if (test_facility(146)) /* MSA8 */
 406                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 407                               kvm_s390_available_subfunc.kma);
 408
 409         if (test_facility(155)) /* MSA9 */
 410                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 411                               kvm_s390_available_subfunc.kdsa);
 412
 413         if (test_facility(150)) /* SORTL */
 414                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 415
 416         if (test_facility(151)) /* DFLTCC */
 417                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 418
 419         if (MACHINE_HAS_ESOP)
 420                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 421         /*
 422          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 423          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 424          */
 425         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 426             !test_facility(3) || !nested)
 427                 return;
 428         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 429         if (sclp.has_64bscao)
 430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 431         if (sclp.has_siif)
 432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 433         if (sclp.has_gpere)
 434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 435         if (sclp.has_gsls)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 437         if (sclp.has_ib)
 438                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 439         if (sclp.has_cei)
 440                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 441         if (sclp.has_ibs)
 442                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 443         if (sclp.has_kss)
 444                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 445         /*
 446          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 447          * all skey handling functions read/set the skey from the PGSTE
 448          * instead of the real storage key.
 449          *
 450          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 451          * pages being detected as preserved although they are resident.
 452          *
 453          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 454          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 455          *
 456          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 457          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 458          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 459          *
 460          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 461          * cannot easily shadow the SCA because of the ipte lock.
 462          */
 463 }
 464
 465 int kvm_arch_init(void *opaque)
 466 {
 467         int rc = -ENOMEM;
 468
 469         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 470         if (!kvm_s390_dbf)
 471                 return -ENOMEM;
 472
 473         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 474         if (!kvm_s390_dbf_uv)
 475                 goto out;
 476
 477         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 478             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 479                 goto out;
 480
 481         kvm_s390_cpu_feat_init();
 482
 483         /* Register floating interrupt controller interface. */
 484         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 485         if (rc) {
 486                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 487                 goto out;
 488         }
 489
 490         rc = kvm_s390_gib_init(GAL_ISC);
 491         if (rc)
 492                 goto out;
 493
 494         return 0;
 495
 496 out:
 497         kvm_arch_exit();
 498         return rc;
 499 }
 500
 501 void kvm_arch_exit(void)
 502 {
 503         kvm_s390_gib_destroy();
 504         debug_unregister(kvm_s390_dbf);
 505         debug_unregister(kvm_s390_dbf_uv);
 506 }
 507
 508 /* Section: device related */
 509 long kvm_arch_dev_ioctl(struct file *filp,
 510                         unsigned int ioctl, unsigned long arg)
 511 {
 512         if (ioctl == KVM_S390_ENABLE_SIE)
 513                 return s390_enable_sie();
 514         return -EINVAL;
 515 }
 516
 517 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 518 {
 519         int r;
 520
 521         switch (ext) {
 522         case KVM_CAP_S390_PSW:
 523         case KVM_CAP_S390_GMAP:
 524         case KVM_CAP_SYNC_MMU:
 525 #ifdef CONFIG_KVM_S390_UCONTROL
 526         case KVM_CAP_S390_UCONTROL:
 527 #endif
 528         case KVM_CAP_ASYNC_PF:
 529         case KVM_CAP_SYNC_REGS:
 530         case KVM_CAP_ONE_REG:
 531         case KVM_CAP_ENABLE_CAP:
 532         case KVM_CAP_S390_CSS_SUPPORT:
 533         case KVM_CAP_IOEVENTFD:
 534         case KVM_CAP_DEVICE_CTRL:
 535         case KVM_CAP_S390_IRQCHIP:
 536         case KVM_CAP_VM_ATTRIBUTES:
 537         case KVM_CAP_MP_STATE:
 538         case KVM_CAP_IMMEDIATE_EXIT:
 539         case KVM_CAP_S390_INJECT_IRQ:
 540         case KVM_CAP_S390_USER_SIGP:
 541         case KVM_CAP_S390_USER_STSI:
 542         case KVM_CAP_S390_SKEYS:
 543         case KVM_CAP_S390_IRQ_STATE:
 544         case KVM_CAP_S390_USER_INSTR0:
 545         case KVM_CAP_S390_CMMA_MIGRATION:
 546         case KVM_CAP_S390_AIS:
 547         case KVM_CAP_S390_AIS_MIGRATION:
 548         case KVM_CAP_S390_VCPU_RESETS:
 549         case KVM_CAP_SET_GUEST_DEBUG:
 550         case KVM_CAP_S390_DIAG318:
 551                 r = 1;
 552                 break;
 553         case KVM_CAP_SET_GUEST_DEBUG2:
 554                 r = KVM_GUESTDBG_VALID_MASK;
 555                 break;
 556         case KVM_CAP_S390_HPAGE_1M:
 557                 r = 0;
 558                 if (hpage && !kvm_is_ucontrol(kvm))
 559                         r = 1;
 560                 break;
 561         case KVM_CAP_S390_MEM_OP:
 562                 r = MEM_OP_MAX_SIZE;
 563                 break;
 564         case KVM_CAP_NR_VCPUS:
 565         case KVM_CAP_MAX_VCPUS:
 566         case KVM_CAP_MAX_VCPU_ID:
 567                 r = KVM_S390_BSCA_CPU_SLOTS;
 568                 if (!kvm_s390_use_sca_entries())
 569                         r = KVM_MAX_VCPUS;
 570                 else if (sclp.has_esca && sclp.has_64bscao)
 571                         r = KVM_S390_ESCA_CPU_SLOTS;
 572                 break;
 573         case KVM_CAP_S390_COW:
 574                 r = MACHINE_HAS_ESOP;
 575                 break;
 576         case KVM_CAP_S390_VECTOR_REGISTERS:
 577                 r = MACHINE_HAS_VX;
 578                 break;
 579         case KVM_CAP_S390_RI:
 580                 r = test_facility(64);
 581                 break;
 582         case KVM_CAP_S390_GS:
 583                 r = test_facility(133);
 584                 break;
 585         case KVM_CAP_S390_BPB:
 586                 r = test_facility(82);
 587                 break;
 588         case KVM_CAP_S390_PROTECTED:
 589                 r = is_prot_virt_host();
 590                 break;
 591         default:
 592                 r = 0;
 593         }
 594         return r;
 595 }
 596
 597 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 598 {
 599         int i;
 600         gfn_t cur_gfn, last_gfn;
 601         unsigned long gaddr, vmaddr;
 602         struct gmap *gmap = kvm->arch.gmap;
 603         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 604
 605         /* Loop over all guest segments */
 606         cur_gfn = memslot->base_gfn;
 607         last_gfn = memslot->base_gfn + memslot->npages;
 608         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 609                 gaddr = gfn_to_gpa(cur_gfn);
 610                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 611                 if (kvm_is_error_hva(vmaddr))
 612                         continue;
 613
 614                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 615                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 616                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 617                         if (test_bit(i, bitmap))
 618                                 mark_page_dirty(kvm, cur_gfn + i);
 619                 }
 620
 621                 if (fatal_signal_pending(current))
 622                         return;
 623                 cond_resched();
 624         }
 625 }
 626
 627 /* Section: vm related */
 628 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 629
 630 /*
 631  * Get (and clear) the dirty memory log for a memory slot.
 632  */
 633 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 634                                struct kvm_dirty_log *log)
 635 {
 636         int r;
 637         unsigned long n;
 638         struct kvm_memory_slot *memslot;
 639         int is_dirty;
 640
 641         if (kvm_is_ucontrol(kvm))
 642                 return -EINVAL;
 643
 644         mutex_lock(&kvm->slots_lock);
 645
 646         r = -EINVAL;
 647         if (log->slot >= KVM_USER_MEM_SLOTS)
 648                 goto out;
 649
 650         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 651         if (r)
 652                 goto out;
 653
 654         /* Clear the dirty log */
 655         if (is_dirty) {
 656                 n = kvm_dirty_bitmap_bytes(memslot);
 657                 memset(memslot->dirty_bitmap, 0, n);
 658         }
 659         r = 0;
 660 out:
 661         mutex_unlock(&kvm->slots_lock);
 662         return r;
 663 }
 664
 665 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 666 {
 667         unsigned int i;
 668         struct kvm_vcpu *vcpu;
 669
 670         kvm_for_each_vcpu(i, vcpu, kvm) {
 671                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 672         }
 673 }
 674
 675 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 676 {
 677         int r;
 678
 679         if (cap->flags)
 680                 return -EINVAL;
 681
 682         switch (cap->cap) {
 683         case KVM_CAP_S390_IRQCHIP:
 684                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 685                 kvm->arch.use_irqchip = 1;
 686                 r = 0;
 687                 break;
 688         case KVM_CAP_S390_USER_SIGP:
 689                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 690                 kvm->arch.user_sigp = 1;
 691                 r = 0;
 692                 break;
 693         case KVM_CAP_S390_VECTOR_REGISTERS:
 694                 mutex_lock(&kvm->lock);
 695                 if (kvm->created_vcpus) {
 696                         r = -EBUSY;
 697                 } else if (MACHINE_HAS_VX) {
 698                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 699                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 700                         if (test_facility(134)) {
 701                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 702                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 703                         }
 704                         if (test_facility(135)) {
 705                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 706                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 707                         }
 708                         if (test_facility(148)) {
 709                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 710                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 711                         }
 712                         if (test_facility(152)) {
 713                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 714                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 715                         }
 716                         r = 0;
 717                 } else
 718                         r = -EINVAL;
 719                 mutex_unlock(&kvm->lock);
 720                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 721                          r ? "(not available)" : "(success)");
 722                 break;
 723         case KVM_CAP_S390_RI:
 724                 r = -EINVAL;
 725                 mutex_lock(&kvm->lock);
 726                 if (kvm->created_vcpus) {
 727                         r = -EBUSY;
 728                 } else if (test_facility(64)) {
 729                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 730                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 731                         r = 0;
 732                 }
 733                 mutex_unlock(&kvm->lock);
 734                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 735                          r ? "(not available)" : "(success)");
 736                 break;
 737         case KVM_CAP_S390_AIS:
 738                 mutex_lock(&kvm->lock);
 739                 if (kvm->created_vcpus) {
 740                         r = -EBUSY;
 741                 } else {
 742                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 743                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 744                         r = 0;
 745                 }
 746                 mutex_unlock(&kvm->lock);
 747                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 748                          r ? "(not available)" : "(success)");
 749                 break;
 750         case KVM_CAP_S390_GS:
 751                 r = -EINVAL;
 752                 mutex_lock(&kvm->lock);
 753                 if (kvm->created_vcpus) {
 754                         r = -EBUSY;
 755                 } else if (test_facility(133)) {
 756                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 757                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 758                         r = 0;
 759                 }
 760                 mutex_unlock(&kvm->lock);
 761                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 762                          r ? "(not available)" : "(success)");
 763                 break;
 764         case KVM_CAP_S390_HPAGE_1M:
 765                 mutex_lock(&kvm->lock);
 766                 if (kvm->created_vcpus)
 767                         r = -EBUSY;
 768                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 769                         r = -EINVAL;
 770                 else {
 771                         r = 0;
 772                         mmap_write_lock(kvm->mm);
 773                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 774                         mmap_write_unlock(kvm->mm);
 775                         /*
 776                          * We might have to create fake 4k page
 777                          * tables. To avoid that the hardware works on
 778                          * stale PGSTEs, we emulate these instructions.
 779                          */
 780                         kvm->arch.use_skf = 0;
 781                         kvm->arch.use_pfmfi = 0;
 782                 }
 783                 mutex_unlock(&kvm->lock);
 784                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 785                          r ? "(not available)" : "(success)");
 786                 break;
 787         case KVM_CAP_S390_USER_STSI:
 788                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 789                 kvm->arch.user_stsi = 1;
 790                 r = 0;
 791                 break;
 792         case KVM_CAP_S390_USER_INSTR0:
 793                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 794                 kvm->arch.user_instr0 = 1;
 795                 icpt_operexc_on_all_vcpus(kvm);
 796                 r = 0;
 797                 break;
 798         default:
 799                 r = -EINVAL;
 800                 break;
 801         }
 802         return r;
 803 }
 804
 805 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 806 {
 807         int ret;
 808
 809         switch (attr->attr) {
 810         case KVM_S390_VM_MEM_LIMIT_SIZE:
 811                 ret = 0;
 812                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 813                          kvm->arch.mem_limit);
 814                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 815                         ret = -EFAULT;
 816                 break;
 817         default:
 818                 ret = -ENXIO;
 819                 break;
 820         }
 821         return ret;
 822 }
 823
 824 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 825 {
 826         int ret;
 827         unsigned int idx;
 828         switch (attr->attr) {
 829         case KVM_S390_VM_MEM_ENABLE_CMMA:
 830                 ret = -ENXIO;
 831                 if (!sclp.has_cmma)
 832                         break;
 833
 834                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 835                 mutex_lock(&kvm->lock);
 836                 if (kvm->created_vcpus)
 837                         ret = -EBUSY;
 838                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 839                         ret = -EINVAL;
 840                 else {
 841                         kvm->arch.use_cmma = 1;
 842                         /* Not compatible with cmma. */
 843                         kvm->arch.use_pfmfi = 0;
 844                         ret = 0;
 845                 }
 846                 mutex_unlock(&kvm->lock);
 847                 break;
 848         case KVM_S390_VM_MEM_CLR_CMMA:
 849                 ret = -ENXIO;
 850                 if (!sclp.has_cmma)
 851                         break;
 852                 ret = -EINVAL;
 853                 if (!kvm->arch.use_cmma)
 854                         break;
 855
 856                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 857                 mutex_lock(&kvm->lock);
 858                 idx = srcu_read_lock(&kvm->srcu);
 859                 s390_reset_cmma(kvm->arch.gmap->mm);
 860                 srcu_read_unlock(&kvm->srcu, idx);
 861                 mutex_unlock(&kvm->lock);
 862                 ret = 0;
 863                 break;
 864         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 865                 unsigned long new_limit;
 866
 867                 if (kvm_is_ucontrol(kvm))
 868                         return -EINVAL;
 869
 870                 if (get_user(new_limit, (u64 __user *)attr->addr))
 871                         return -EFAULT;
 872
 873                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 874                     new_limit > kvm->arch.mem_limit)
 875                         return -E2BIG;
 876
 877                 if (!new_limit)
 878                         return -EINVAL;
 879
 880                 /* gmap_create takes last usable address */
 881                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 882                         new_limit -= 1;
 883
 884                 ret = -EBUSY;
 885                 mutex_lock(&kvm->lock);
 886                 if (!kvm->created_vcpus) {
 887                         /* gmap_create will round the limit up */
 888                         struct gmap *new = gmap_create(current->mm, new_limit);
 889
 890                         if (!new) {
 891                                 ret = -ENOMEM;
 892                         } else {
 893                                 gmap_remove(kvm->arch.gmap);
 894                                 new->private = kvm;
 895                                 kvm->arch.gmap = new;
 896                                 ret = 0;
 897                         }
 898                 }
 899                 mutex_unlock(&kvm->lock);
 900                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 901                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 902                          (void *) kvm->arch.gmap->asce);
 903                 break;
 904         }
 905         default:
 906                 ret = -ENXIO;
 907                 break;
 908         }
 909         return ret;
 910 }
 911
 912 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 913
 914 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 915 {
 916         struct kvm_vcpu *vcpu;
 917         int i;
 918
 919         kvm_s390_vcpu_block_all(kvm);
 920
 921         kvm_for_each_vcpu(i, vcpu, kvm) {
 922                 kvm_s390_vcpu_crypto_setup(vcpu);
 923                 /* recreate the shadow crycb by leaving the VSIE handler */
 924                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 925         }
 926
 927         kvm_s390_vcpu_unblock_all(kvm);
 928 }
 929
 930 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 931 {
 932         mutex_lock(&kvm->lock);
 933         switch (attr->attr) {
 934         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 935                 if (!test_kvm_facility(kvm, 76)) {
 936                         mutex_unlock(&kvm->lock);
 937                         return -EINVAL;
 938                 }
 939                 get_random_bytes(
 940                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 941                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 942                 kvm->arch.crypto.aes_kw = 1;
 943                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 944                 break;
 945         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 946                 if (!test_kvm_facility(kvm, 76)) {
 947                         mutex_unlock(&kvm->lock);
 948                         return -EINVAL;
 949                 }
 950                 get_random_bytes(
 951                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 952                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 953                 kvm->arch.crypto.dea_kw = 1;
 954                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 955                 break;
 956         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 957                 if (!test_kvm_facility(kvm, 76)) {
 958                         mutex_unlock(&kvm->lock);
 959                         return -EINVAL;
 960                 }
 961                 kvm->arch.crypto.aes_kw = 0;
 962                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 963                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 964                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 965                 break;
 966         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 967                 if (!test_kvm_facility(kvm, 76)) {
 968                         mutex_unlock(&kvm->lock);
 969                         return -EINVAL;
 970                 }
 971                 kvm->arch.crypto.dea_kw = 0;
 972                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 973                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 974                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 975                 break;
 976         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 977                 if (!ap_instructions_available()) {
 978                         mutex_unlock(&kvm->lock);
 979                         return -EOPNOTSUPP;
 980                 }
 981                 kvm->arch.crypto.apie = 1;
 982                 break;
 983         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 984                 if (!ap_instructions_available()) {
 985                         mutex_unlock(&kvm->lock);
 986                         return -EOPNOTSUPP;
 987                 }
 988                 kvm->arch.crypto.apie = 0;
 989                 break;
 990         default:
 991                 mutex_unlock(&kvm->lock);
 992                 return -ENXIO;
 993         }
 994
 995         kvm_s390_vcpu_crypto_reset_all(kvm);
 996         mutex_unlock(&kvm->lock);
 997         return 0;
 998 }
 999
1000 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1001 {
1002         int cx;
1003         struct kvm_vcpu *vcpu;
1004
1005         kvm_for_each_vcpu(cx, vcpu, kvm)
1006                 kvm_s390_sync_request(req, vcpu);
1007 }
1008
1009 /*
1010  * Must be called with kvm->srcu held to avoid races on memslots, and with
1011  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1012  */
1013 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1014 {
1015         struct kvm_memory_slot *ms;
1016         struct kvm_memslots *slots;
1017         unsigned long ram_pages = 0;
1018         int slotnr;
1019
1020         /* migration mode already enabled */
1021         if (kvm->arch.migration_mode)
1022                 return 0;
1023         slots = kvm_memslots(kvm);
1024         if (!slots || !slots->used_slots)
1025                 return -EINVAL;
1026
1027         if (!kvm->arch.use_cmma) {
1028                 kvm->arch.migration_mode = 1;
1029                 return 0;
1030         }
1031         /* mark all the pages in active slots as dirty */
1032         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1033                 ms = slots->memslots + slotnr;
1034                 if (!ms->dirty_bitmap)
1035                         return -EINVAL;
1036                 /*
1037                  * The second half of the bitmap is only used on x86,
1038                  * and would be wasted otherwise, so we put it to good
1039                  * use here to keep track of the state of the storage
1040                  * attributes.
1041                  */
1042                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1043                 ram_pages += ms->npages;
1044         }
1045         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1046         kvm->arch.migration_mode = 1;
1047         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1048         return 0;
1049 }
1050
1051 /*
1052  * Must be called with kvm->slots_lock to avoid races with ourselves and
1053  * kvm_s390_vm_start_migration.
1054  */
1055 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1056 {
1057         /* migration mode already disabled */
1058         if (!kvm->arch.migration_mode)
1059                 return 0;
1060         kvm->arch.migration_mode = 0;
1061         if (kvm->arch.use_cmma)
1062                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1063         return 0;
1064 }
1065
1066 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1067                                      struct kvm_device_attr *attr)
1068 {
1069         int res = -ENXIO;
1070
1071         mutex_lock(&kvm->slots_lock);
1072         switch (attr->attr) {
1073         case KVM_S390_VM_MIGRATION_START:
1074                 res = kvm_s390_vm_start_migration(kvm);
1075                 break;
1076         case KVM_S390_VM_MIGRATION_STOP:
1077                 res = kvm_s390_vm_stop_migration(kvm);
1078                 break;
1079         default:
1080                 break;
1081         }
1082         mutex_unlock(&kvm->slots_lock);
1083
1084         return res;
1085 }
1086
1087 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1088                                      struct kvm_device_attr *attr)
1089 {
1090         u64 mig = kvm->arch.migration_mode;
1091
1092         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1093                 return -ENXIO;
1094
1095         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1096                 return -EFAULT;
1097         return 0;
1098 }
1099
1100 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1101 {
1102         struct kvm_s390_vm_tod_clock gtod;
1103
1104         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1105                 return -EFAULT;
1106
1107         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1108                 return -EINVAL;
1109         kvm_s390_set_tod_clock(kvm, &gtod);
1110
1111         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1112                 gtod.epoch_idx, gtod.tod);
1113
1114         return 0;
1115 }
1116
1117 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1118 {
1119         u8 gtod_high;
1120
1121         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1122                                            sizeof(gtod_high)))
1123                 return -EFAULT;
1124
1125         if (gtod_high != 0)
1126                 return -EINVAL;
1127         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1128
1129         return 0;
1130 }
1131
1132 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1133 {
1134         struct kvm_s390_vm_tod_clock gtod = { 0 };
1135
1136         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1137                            sizeof(gtod.tod)))
1138                 return -EFAULT;
1139
1140         kvm_s390_set_tod_clock(kvm, &gtod);
1141         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1142         return 0;
1143 }
1144
1145 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1146 {
1147         int ret;
1148
1149         if (attr->flags)
1150                 return -EINVAL;
1151
1152         switch (attr->attr) {
1153         case KVM_S390_VM_TOD_EXT:
1154                 ret = kvm_s390_set_tod_ext(kvm, attr);
1155                 break;
1156         case KVM_S390_VM_TOD_HIGH:
1157                 ret = kvm_s390_set_tod_high(kvm, attr);
1158                 break;
1159         case KVM_S390_VM_TOD_LOW:
1160                 ret = kvm_s390_set_tod_low(kvm, attr);
1161                 break;
1162         default:
1163                 ret = -ENXIO;
1164                 break;
1165         }
1166         return ret;
1167 }
1168
1169 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1170                                    struct kvm_s390_vm_tod_clock *gtod)
1171 {
1172         union tod_clock clk;
1173
1174         preempt_disable();
1175
1176         store_tod_clock_ext(&clk);
1177
1178         gtod->tod = clk.tod + kvm->arch.epoch;
1179         gtod->epoch_idx = 0;
1180         if (test_kvm_facility(kvm, 139)) {
1181                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1182                 if (gtod->tod < clk.tod)
1183                         gtod->epoch_idx += 1;
1184         }
1185
1186         preempt_enable();
1187 }
1188
1189 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1190 {
1191         struct kvm_s390_vm_tod_clock gtod;
1192
1193         memset(&gtod, 0, sizeof(gtod));
1194         kvm_s390_get_tod_clock(kvm, &gtod);
1195         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1196                 return -EFAULT;
1197
1198         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1199                 gtod.epoch_idx, gtod.tod);
1200         return 0;
1201 }
1202
1203 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1204 {
1205         u8 gtod_high = 0;
1206
1207         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1208                                          sizeof(gtod_high)))
1209                 return -EFAULT;
1210         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1211
1212         return 0;
1213 }
1214
1215 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1216 {
1217         u64 gtod;
1218
1219         gtod = kvm_s390_get_tod_clock_fast(kvm);
1220         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1221                 return -EFAULT;
1222         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1223
1224         return 0;
1225 }
1226
1227 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1228 {
1229         int ret;
1230
1231         if (attr->flags)
1232                 return -EINVAL;
1233
1234         switch (attr->attr) {
1235         case KVM_S390_VM_TOD_EXT:
1236                 ret = kvm_s390_get_tod_ext(kvm, attr);
1237                 break;
1238         case KVM_S390_VM_TOD_HIGH:
1239                 ret = kvm_s390_get_tod_high(kvm, attr);
1240                 break;
1241         case KVM_S390_VM_TOD_LOW:
1242                 ret = kvm_s390_get_tod_low(kvm, attr);
1243                 break;
1244         default:
1245                 ret = -ENXIO;
1246                 break;
1247         }
1248         return ret;
1249 }
1250
1251 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1252 {
1253         struct kvm_s390_vm_cpu_processor *proc;
1254         u16 lowest_ibc, unblocked_ibc;
1255         int ret = 0;
1256
1257         mutex_lock(&kvm->lock);
1258         if (kvm->created_vcpus) {
1259                 ret = -EBUSY;
1260                 goto out;
1261         }
1262         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1263         if (!proc) {
1264                 ret = -ENOMEM;
1265                 goto out;
1266         }
1267         if (!copy_from_user(proc, (void __user *)attr->addr,
1268                             sizeof(*proc))) {
1269                 kvm->arch.model.cpuid = proc->cpuid;
1270                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1271                 unblocked_ibc = sclp.ibc & 0xfff;
1272                 if (lowest_ibc && proc->ibc) {
1273                         if (proc->ibc > unblocked_ibc)
1274                                 kvm->arch.model.ibc = unblocked_ibc;
1275                         else if (proc->ibc < lowest_ibc)
1276                                 kvm->arch.model.ibc = lowest_ibc;
1277                         else
1278                                 kvm->arch.model.ibc = proc->ibc;
1279                 }
1280                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1281                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1282                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1283                          kvm->arch.model.ibc,
1284                          kvm->arch.model.cpuid);
1285                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1286                          kvm->arch.model.fac_list[0],
1287                          kvm->arch.model.fac_list[1],
1288                          kvm->arch.model.fac_list[2]);
1289         } else
1290                 ret = -EFAULT;
1291         kfree(proc);
1292 out:
1293         mutex_unlock(&kvm->lock);
1294         return ret;
1295 }
1296
1297 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1298                                        struct kvm_device_attr *attr)
1299 {
1300         struct kvm_s390_vm_cpu_feat data;
1301
1302         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1303                 return -EFAULT;
1304         if (!bitmap_subset((unsigned long *) data.feat,
1305                            kvm_s390_available_cpu_feat,
1306                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1307                 return -EINVAL;
1308
1309         mutex_lock(&kvm->lock);
1310         if (kvm->created_vcpus) {
1311                 mutex_unlock(&kvm->lock);
1312                 return -EBUSY;
1313         }
1314         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1315                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1316         mutex_unlock(&kvm->lock);
1317         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1318                          data.feat[0],
1319                          data.feat[1],
1320                          data.feat[2]);
1321         return 0;
1322 }
1323
1324 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1325                                           struct kvm_device_attr *attr)
1326 {
1327         mutex_lock(&kvm->lock);
1328         if (kvm->created_vcpus) {
1329                 mutex_unlock(&kvm->lock);
1330                 return -EBUSY;
1331         }
1332
1333         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1334                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1335                 mutex_unlock(&kvm->lock);
1336                 return -EFAULT;
1337         }
1338         mutex_unlock(&kvm->lock);
1339
1340         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1343                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1345         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1346                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1348         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1349                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1351         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1352                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1354         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1355                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1357         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1358                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1360         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1361                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1363         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1366         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1384         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1387         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1390                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1392         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1393                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1396                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1397
1398         return 0;
1399 }
1400
1401 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1402 {
1403         int ret = -ENXIO;
1404
1405         switch (attr->attr) {
1406         case KVM_S390_VM_CPU_PROCESSOR:
1407                 ret = kvm_s390_set_processor(kvm, attr);
1408                 break;
1409         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1410                 ret = kvm_s390_set_processor_feat(kvm, attr);
1411                 break;
1412         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1413                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1414                 break;
1415         }
1416         return ret;
1417 }
1418
1419 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1420 {
1421         struct kvm_s390_vm_cpu_processor *proc;
1422         int ret = 0;
1423
1424         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1425         if (!proc) {
1426                 ret = -ENOMEM;
1427                 goto out;
1428         }
1429         proc->cpuid = kvm->arch.model.cpuid;
1430         proc->ibc = kvm->arch.model.ibc;
1431         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1432                S390_ARCH_FAC_LIST_SIZE_BYTE);
1433         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1434                  kvm->arch.model.ibc,
1435                  kvm->arch.model.cpuid);
1436         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1437                  kvm->arch.model.fac_list[0],
1438                  kvm->arch.model.fac_list[1],
1439                  kvm->arch.model.fac_list[2]);
1440         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1441                 ret = -EFAULT;
1442         kfree(proc);
1443 out:
1444         return ret;
1445 }
1446
1447 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1448 {
1449         struct kvm_s390_vm_cpu_machine *mach;
1450         int ret = 0;
1451
1452         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1453         if (!mach) {
1454                 ret = -ENOMEM;
1455                 goto out;
1456         }
1457         get_cpu_id((struct cpuid *) &mach->cpuid);
1458         mach->ibc = sclp.ibc;
1459         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1460                S390_ARCH_FAC_LIST_SIZE_BYTE);
1461         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1462                sizeof(S390_lowcore.stfle_fac_list));
1463         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1464                  kvm->arch.model.ibc,
1465                  kvm->arch.model.cpuid);
1466         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1467                  mach->fac_mask[0],
1468                  mach->fac_mask[1],
1469                  mach->fac_mask[2]);
1470         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1471                  mach->fac_list[0],
1472                  mach->fac_list[1],
1473                  mach->fac_list[2]);
1474         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1475                 ret = -EFAULT;
1476         kfree(mach);
1477 out:
1478         return ret;
1479 }
1480
1481 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1482                                        struct kvm_device_attr *attr)
1483 {
1484         struct kvm_s390_vm_cpu_feat data;
1485
1486         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1487                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1488         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1489                 return -EFAULT;
1490         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1491                          data.feat[0],
1492                          data.feat[1],
1493                          data.feat[2]);
1494         return 0;
1495 }
1496
1497 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1498                                      struct kvm_device_attr *attr)
1499 {
1500         struct kvm_s390_vm_cpu_feat data;
1501
1502         bitmap_copy((unsigned long *) data.feat,
1503                     kvm_s390_available_cpu_feat,
1504                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1505         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1506                 return -EFAULT;
1507         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1508                          data.feat[0],
1509                          data.feat[1],
1510                          data.feat[2]);
1511         return 0;
1512 }
1513
1514 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1515                                           struct kvm_device_attr *attr)
1516 {
1517         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1518             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1519                 return -EFAULT;
1520
1521         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1524                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1526         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1527                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1529         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1530                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1532         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1533                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1535         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1536                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1538         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1539                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1541         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1542                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1544         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1547         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1565         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1568         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1571                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1573         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1574                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1577                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1578
1579         return 0;
1580 }
1581
1582 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1583                                         struct kvm_device_attr *attr)
1584 {
1585         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1586             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1587                 return -EFAULT;
1588
1589         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1591                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1592                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1593                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1594         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1596                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1597         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1599                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1600         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1601                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1602                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1603         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1604                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1605                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1606         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1607                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1608                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1609         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1610                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1611                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1612         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1613                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1615         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1633         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1634                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1636         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1639                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1640                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1641         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1642                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1643                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1644                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1645                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1646
1647         return 0;
1648 }
1649
1650 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1651 {
1652         int ret = -ENXIO;
1653
1654         switch (attr->attr) {
1655         case KVM_S390_VM_CPU_PROCESSOR:
1656                 ret = kvm_s390_get_processor(kvm, attr);
1657                 break;
1658         case KVM_S390_VM_CPU_MACHINE:
1659                 ret = kvm_s390_get_machine(kvm, attr);
1660                 break;
1661         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1662                 ret = kvm_s390_get_processor_feat(kvm, attr);
1663                 break;
1664         case KVM_S390_VM_CPU_MACHINE_FEAT:
1665                 ret = kvm_s390_get_machine_feat(kvm, attr);
1666                 break;
1667         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1668                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1669                 break;
1670         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1671                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1672                 break;
1673         }
1674         return ret;
1675 }
1676
1677 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1678 {
1679         int ret;
1680
1681         switch (attr->group) {
1682         case KVM_S390_VM_MEM_CTRL:
1683                 ret = kvm_s390_set_mem_control(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_TOD:
1686                 ret = kvm_s390_set_tod(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_CPU_MODEL:
1689                 ret = kvm_s390_set_cpu_model(kvm, attr);
1690                 break;
1691         case KVM_S390_VM_CRYPTO:
1692                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1693                 break;
1694         case KVM_S390_VM_MIGRATION:
1695                 ret = kvm_s390_vm_set_migration(kvm, attr);
1696                 break;
1697         default:
1698                 ret = -ENXIO;
1699                 break;
1700         }
1701
1702         return ret;
1703 }
1704
1705 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1706 {
1707         int ret;
1708
1709         switch (attr->group) {
1710         case KVM_S390_VM_MEM_CTRL:
1711                 ret = kvm_s390_get_mem_control(kvm, attr);
1712                 break;
1713         case KVM_S390_VM_TOD:
1714                 ret = kvm_s390_get_tod(kvm, attr);
1715                 break;
1716         case KVM_S390_VM_CPU_MODEL:
1717                 ret = kvm_s390_get_cpu_model(kvm, attr);
1718                 break;
1719         case KVM_S390_VM_MIGRATION:
1720                 ret = kvm_s390_vm_get_migration(kvm, attr);
1721                 break;
1722         default:
1723                 ret = -ENXIO;
1724                 break;
1725         }
1726
1727         return ret;
1728 }
1729
1730 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1731 {
1732         int ret;
1733
1734         switch (attr->group) {
1735         case KVM_S390_VM_MEM_CTRL:
1736                 switch (attr->attr) {
1737                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1738                 case KVM_S390_VM_MEM_CLR_CMMA:
1739                         ret = sclp.has_cmma ? 0 : -ENXIO;
1740                         break;
1741                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1742                         ret = 0;
1743                         break;
1744                 default:
1745                         ret = -ENXIO;
1746                         break;
1747                 }
1748                 break;
1749         case KVM_S390_VM_TOD:
1750                 switch (attr->attr) {
1751                 case KVM_S390_VM_TOD_LOW:
1752                 case KVM_S390_VM_TOD_HIGH:
1753                         ret = 0;
1754                         break;
1755                 default:
1756                         ret = -ENXIO;
1757                         break;
1758                 }
1759                 break;
1760         case KVM_S390_VM_CPU_MODEL:
1761                 switch (attr->attr) {
1762                 case KVM_S390_VM_CPU_PROCESSOR:
1763                 case KVM_S390_VM_CPU_MACHINE:
1764                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1765                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1766                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1767                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1768                         ret = 0;
1769                         break;
1770                 default:
1771                         ret = -ENXIO;
1772                         break;
1773                 }
1774                 break;
1775         case KVM_S390_VM_CRYPTO:
1776                 switch (attr->attr) {
1777                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1778                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1779                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1780                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1781                         ret = 0;
1782                         break;
1783                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1784                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1785                         ret = ap_instructions_available() ? 0 : -ENXIO;
1786                         break;
1787                 default:
1788                         ret = -ENXIO;
1789                         break;
1790                 }
1791                 break;
1792         case KVM_S390_VM_MIGRATION:
1793                 ret = 0;
1794                 break;
1795         default:
1796                 ret = -ENXIO;
1797                 break;
1798         }
1799
1800         return ret;
1801 }
1802
1803 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1804 {
1805         uint8_t *keys;
1806         uint64_t hva;
1807         int srcu_idx, i, r = 0;
1808
1809         if (args->flags != 0)
1810                 return -EINVAL;
1811
1812         /* Is this guest using storage keys? */
1813         if (!mm_uses_skeys(current->mm))
1814                 return KVM_S390_GET_SKEYS_NONE;
1815
1816         /* Enforce sane limit on memory allocation */
1817         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1818                 return -EINVAL;
1819
1820         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1821         if (!keys)
1822                 return -ENOMEM;
1823
1824         mmap_read_lock(current->mm);
1825         srcu_idx = srcu_read_lock(&kvm->srcu);
1826         for (i = 0; i < args->count; i++) {
1827                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1828                 if (kvm_is_error_hva(hva)) {
1829                         r = -EFAULT;
1830                         break;
1831                 }
1832
1833                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1834                 if (r)
1835                         break;
1836         }
1837         srcu_read_unlock(&kvm->srcu, srcu_idx);
1838         mmap_read_unlock(current->mm);
1839
1840         if (!r) {
1841                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1842                                  sizeof(uint8_t) * args->count);
1843                 if (r)
1844                         r = -EFAULT;
1845         }
1846
1847         kvfree(keys);
1848         return r;
1849 }
1850
1851 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1852 {
1853         uint8_t *keys;
1854         uint64_t hva;
1855         int srcu_idx, i, r = 0;
1856         bool unlocked;
1857
1858         if (args->flags != 0)
1859                 return -EINVAL;
1860
1861         /* Enforce sane limit on memory allocation */
1862         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1863                 return -EINVAL;
1864
1865         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1866         if (!keys)
1867                 return -ENOMEM;
1868
1869         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1870                            sizeof(uint8_t) * args->count);
1871         if (r) {
1872                 r = -EFAULT;
1873                 goto out;
1874         }
1875
1876         /* Enable storage key handling for the guest */
1877         r = s390_enable_skey();
1878         if (r)
1879                 goto out;
1880
1881         i = 0;
1882         mmap_read_lock(current->mm);
1883         srcu_idx = srcu_read_lock(&kvm->srcu);
1884         while (i < args->count) {
1885                 unlocked = false;
1886                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1887                 if (kvm_is_error_hva(hva)) {
1888                         r = -EFAULT;
1889                         break;
1890                 }
1891
1892                 /* Lowest order bit is reserved */
1893                 if (keys[i] & 0x01) {
1894                         r = -EINVAL;
1895                         break;
1896                 }
1897
1898                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1899                 if (r) {
1900                         r = fixup_user_fault(current->mm, hva,
1901                                              FAULT_FLAG_WRITE, &unlocked);
1902                         if (r)
1903                                 break;
1904                 }
1905                 if (!r)
1906                         i++;
1907         }
1908         srcu_read_unlock(&kvm->srcu, srcu_idx);
1909         mmap_read_unlock(current->mm);
1910 out:
1911         kvfree(keys);
1912         return r;
1913 }
1914
1915 /*
1916  * Base address and length must be sent at the start of each block, therefore
1917  * it's cheaper to send some clean data, as long as it's less than the size of
1918  * two longs.
1919  */
1920 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1921 /* for consistency */
1922 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1923
1924 /*
1925  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1926  * address falls in a hole. In that case the index of one of the memslots
1927  * bordering the hole is returned.
1928  */
1929 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1930 {
1931         int start = 0, end = slots->used_slots;
1932         int slot = atomic_read(&slots->lru_slot);
1933         struct kvm_memory_slot *memslots = slots->memslots;
1934
1935         if (gfn >= memslots[slot].base_gfn &&
1936             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1937                 return slot;
1938
1939         while (start < end) {
1940                 slot = start + (end - start) / 2;
1941
1942                 if (gfn >= memslots[slot].base_gfn)
1943                         end = slot;
1944                 else
1945                         start = slot + 1;
1946         }
1947
1948         if (start >= slots->used_slots)
1949                 return slots->used_slots - 1;
1950
1951         if (gfn >= memslots[start].base_gfn &&
1952             gfn < memslots[start].base_gfn + memslots[start].npages) {
1953                 atomic_set(&slots->lru_slot, start);
1954         }
1955
1956         return start;
1957 }
1958
1959 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1960                               u8 *res, unsigned long bufsize)
1961 {
1962         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1963
1964         args->count = 0;
1965         while (args->count < bufsize) {
1966                 hva = gfn_to_hva(kvm, cur_gfn);
1967                 /*
1968                  * We return an error if the first value was invalid, but we
1969                  * return successfully if at least one value was copied.
1970                  */
1971                 if (kvm_is_error_hva(hva))
1972                         return args->count ? 0 : -EFAULT;
1973                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1974                         pgstev = 0;
1975                 res[args->count++] = (pgstev >> 24) & 0x43;
1976                 cur_gfn++;
1977         }
1978
1979         return 0;
1980 }
1981
1982 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1983                                               unsigned long cur_gfn)
1984 {
1985         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1986         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1987         unsigned long ofs = cur_gfn - ms->base_gfn;
1988
1989         if (ms->base_gfn + ms->npages <= cur_gfn) {
1990                 slotidx--;
1991                 /* If we are above the highest slot, wrap around */
1992                 if (slotidx < 0)
1993                         slotidx = slots->used_slots - 1;
1994
1995                 ms = slots->memslots + slotidx;
1996                 ofs = 0;
1997         }
1998         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1999         while ((slotidx > 0) && (ofs >= ms->npages)) {
2000                 slotidx--;
2001                 ms = slots->memslots + slotidx;
2002                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2003         }
2004         return ms->base_gfn + ofs;
2005 }
2006
2007 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2008                              u8 *res, unsigned long bufsize)
2009 {
2010         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2011         struct kvm_memslots *slots = kvm_memslots(kvm);
2012         struct kvm_memory_slot *ms;
2013
2014         if (unlikely(!slots->used_slots))
2015                 return 0;
2016
2017         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2018         ms = gfn_to_memslot(kvm, cur_gfn);
2019         args->count = 0;
2020         args->start_gfn = cur_gfn;
2021         if (!ms)
2022                 return 0;
2023         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2024         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2025
2026         while (args->count < bufsize) {
2027                 hva = gfn_to_hva(kvm, cur_gfn);
2028                 if (kvm_is_error_hva(hva))
2029                         return 0;
2030                 /* Decrement only if we actually flipped the bit to 0 */
2031                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2032                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2033                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2034                         pgstev = 0;
2035                 /* Save the value */
2036                 res[args->count++] = (pgstev >> 24) & 0x43;
2037                 /* If the next bit is too far away, stop. */
2038                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2039                         return 0;
2040                 /* If we reached the previous "next", find the next one */
2041                 if (cur_gfn == next_gfn)
2042                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2043                 /* Reached the end of memory or of the buffer, stop */
2044                 if ((next_gfn >= mem_end) ||
2045                     (next_gfn - args->start_gfn >= bufsize))
2046                         return 0;
2047                 cur_gfn++;
2048                 /* Reached the end of the current memslot, take the next one. */
2049                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2050                         ms = gfn_to_memslot(kvm, cur_gfn);
2051                         if (!ms)
2052                                 return 0;
2053                 }
2054         }
2055         return 0;
2056 }
2057
2058 /*
2059  * This function searches for the next page with dirty CMMA attributes, and
2060  * saves the attributes in the buffer up to either the end of the buffer or
2061  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2062  * no trailing clean bytes are saved.
2063  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2064  * output buffer will indicate 0 as length.
2065  */
2066 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2067                                   struct kvm_s390_cmma_log *args)
2068 {
2069         unsigned long bufsize;
2070         int srcu_idx, peek, ret;
2071         u8 *values;
2072
2073         if (!kvm->arch.use_cmma)
2074                 return -ENXIO;
2075         /* Invalid/unsupported flags were specified */
2076         if (args->flags & ~KVM_S390_CMMA_PEEK)
2077                 return -EINVAL;
2078         /* Migration mode query, and we are not doing a migration */
2079         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2080         if (!peek && !kvm->arch.migration_mode)
2081                 return -EINVAL;
2082         /* CMMA is disabled or was not used, or the buffer has length zero */
2083         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2084         if (!bufsize || !kvm->mm->context.uses_cmm) {
2085                 memset(args, 0, sizeof(*args));
2086                 return 0;
2087         }
2088         /* We are not peeking, and there are no dirty pages */
2089         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2090                 memset(args, 0, sizeof(*args));
2091                 return 0;
2092         }
2093
2094         values = vmalloc(bufsize);
2095         if (!values)
2096                 return -ENOMEM;
2097
2098         mmap_read_lock(kvm->mm);
2099         srcu_idx = srcu_read_lock(&kvm->srcu);
2100         if (peek)
2101                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2102         else
2103                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2104         srcu_read_unlock(&kvm->srcu, srcu_idx);
2105         mmap_read_unlock(kvm->mm);
2106
2107         if (kvm->arch.migration_mode)
2108                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2109         else
2110                 args->remaining = 0;
2111
2112         if (copy_to_user((void __user *)args->values, values, args->count))
2113                 ret = -EFAULT;
2114
2115         vfree(values);
2116         return ret;
2117 }
2118
2119 /*
2120  * This function sets the CMMA attributes for the given pages. If the input
2121  * buffer has zero length, no action is taken, otherwise the attributes are
2122  * set and the mm->context.uses_cmm flag is set.
2123  */
2124 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2125                                   const struct kvm_s390_cmma_log *args)
2126 {
2127         unsigned long hva, mask, pgstev, i;
2128         uint8_t *bits;
2129         int srcu_idx, r = 0;
2130
2131         mask = args->mask;
2132
2133         if (!kvm->arch.use_cmma)
2134                 return -ENXIO;
2135         /* invalid/unsupported flags */
2136         if (args->flags != 0)
2137                 return -EINVAL;
2138         /* Enforce sane limit on memory allocation */
2139         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2140                 return -EINVAL;
2141         /* Nothing to do */
2142         if (args->count == 0)
2143                 return 0;
2144
2145         bits = vmalloc(array_size(sizeof(*bits), args->count));
2146         if (!bits)
2147                 return -ENOMEM;
2148
2149         r = copy_from_user(bits, (void __user *)args->values, args->count);
2150         if (r) {
2151                 r = -EFAULT;
2152                 goto out;
2153         }
2154
2155         mmap_read_lock(kvm->mm);
2156         srcu_idx = srcu_read_lock(&kvm->srcu);
2157         for (i = 0; i < args->count; i++) {
2158                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2159                 if (kvm_is_error_hva(hva)) {
2160                         r = -EFAULT;
2161                         break;
2162                 }
2163
2164                 pgstev = bits[i];
2165                 pgstev = pgstev << 24;
2166                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2167                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2168         }
2169         srcu_read_unlock(&kvm->srcu, srcu_idx);
2170         mmap_read_unlock(kvm->mm);
2171
2172         if (!kvm->mm->context.uses_cmm) {
2173                 mmap_write_lock(kvm->mm);
2174                 kvm->mm->context.uses_cmm = 1;
2175                 mmap_write_unlock(kvm->mm);
2176         }
2177 out:
2178         vfree(bits);
2179         return r;
2180 }
2181
2182 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2183 {
2184         struct kvm_vcpu *vcpu;
2185         u16 rc, rrc;
2186         int ret = 0;
2187         int i;
2188
2189         /*
2190          * We ignore failures and try to destroy as many CPUs as possible.
2191          * At the same time we must not free the assigned resources when
2192          * this fails, as the ultravisor has still access to that memory.
2193          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2194          * behind.
2195          * We want to return the first failure rc and rrc, though.
2196          */
2197         kvm_for_each_vcpu(i, vcpu, kvm) {
2198                 mutex_lock(&vcpu->mutex);
2199                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2200                         *rcp = rc;
2201                         *rrcp = rrc;
2202                         ret = -EIO;
2203                 }
2204                 mutex_unlock(&vcpu->mutex);
2205         }
2206         return ret;
2207 }
2208
2209 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2210 {
2211         int i, r = 0;
2212         u16 dummy;
2213
2214         struct kvm_vcpu *vcpu;
2215
2216         kvm_for_each_vcpu(i, vcpu, kvm) {
2217                 mutex_lock(&vcpu->mutex);
2218                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2219                 mutex_unlock(&vcpu->mutex);
2220                 if (r)
2221                         break;
2222         }
2223         if (r)
2224                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2225         return r;
2226 }
2227
2228 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2229 {
2230         int r = 0;
2231         u16 dummy;
2232         void __user *argp = (void __user *)cmd->data;
2233
2234         switch (cmd->cmd) {
2235         case KVM_PV_ENABLE: {
2236                 r = -EINVAL;
2237                 if (kvm_s390_pv_is_protected(kvm))
2238                         break;
2239
2240                 /*
2241                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2242                  *  esca, we need no cleanup in the error cases below
2243                  */
2244                 r = sca_switch_to_extended(kvm);
2245                 if (r)
2246                         break;
2247
2248                 mmap_write_lock(current->mm);
2249                 r = gmap_mark_unmergeable();
2250                 mmap_write_unlock(current->mm);
2251                 if (r)
2252                         break;
2253
2254                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2255                 if (r)
2256                         break;
2257
2258                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2259                 if (r)
2260                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2261
2262                 /* we need to block service interrupts from now on */
2263                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2264                 break;
2265         }
2266         case KVM_PV_DISABLE: {
2267                 r = -EINVAL;
2268                 if (!kvm_s390_pv_is_protected(kvm))
2269                         break;
2270
2271                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2272                 /*
2273                  * If a CPU could not be destroyed, destroy VM will also fail.
2274                  * There is no point in trying to destroy it. Instead return
2275                  * the rc and rrc from the first CPU that failed destroying.
2276                  */
2277                 if (r)
2278                         break;
2279                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2280
2281                 /* no need to block service interrupts any more */
2282                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2283                 break;
2284         }
2285         case KVM_PV_SET_SEC_PARMS: {
2286                 struct kvm_s390_pv_sec_parm parms = {};
2287                 void *hdr;
2288
2289                 r = -EINVAL;
2290                 if (!kvm_s390_pv_is_protected(kvm))
2291                         break;
2292
2293                 r = -EFAULT;
2294                 if (copy_from_user(&parms, argp, sizeof(parms)))
2295                         break;
2296
2297                 /* Currently restricted to 8KB */
2298                 r = -EINVAL;
2299                 if (parms.length > PAGE_SIZE * 2)
2300                         break;
2301
2302                 r = -ENOMEM;
2303                 hdr = vmalloc(parms.length);
2304                 if (!hdr)
2305                         break;
2306
2307                 r = -EFAULT;
2308                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2309                                     parms.length))
2310                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2311                                                       &cmd->rc, &cmd->rrc);
2312
2313                 vfree(hdr);
2314                 break;
2315         }
2316         case KVM_PV_UNPACK: {
2317                 struct kvm_s390_pv_unp unp = {};
2318
2319                 r = -EINVAL;
2320                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2321                         break;
2322
2323                 r = -EFAULT;
2324                 if (copy_from_user(&unp, argp, sizeof(unp)))
2325                         break;
2326
2327                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2328                                        &cmd->rc, &cmd->rrc);
2329                 break;
2330         }
2331         case KVM_PV_VERIFY: {
2332                 r = -EINVAL;
2333                 if (!kvm_s390_pv_is_protected(kvm))
2334                         break;
2335
2336                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2337                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2338                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2339                              cmd->rrc);
2340                 break;
2341         }
2342         case KVM_PV_PREP_RESET: {
2343                 r = -EINVAL;
2344                 if (!kvm_s390_pv_is_protected(kvm))
2345                         break;
2346
2347                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2348                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2349                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2350                              cmd->rc, cmd->rrc);
2351                 break;
2352         }
2353         case KVM_PV_UNSHARE_ALL: {
2354                 r = -EINVAL;
2355                 if (!kvm_s390_pv_is_protected(kvm))
2356                         break;
2357
2358                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2359                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2360                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2361                              cmd->rc, cmd->rrc);
2362                 break;
2363         }
2364         default:
2365                 r = -ENOTTY;
2366         }
2367         return r;
2368 }
2369
2370 long kvm_arch_vm_ioctl(struct file *filp,
2371                        unsigned int ioctl, unsigned long arg)
2372 {
2373         struct kvm *kvm = filp->private_data;
2374         void __user *argp = (void __user *)arg;
2375         struct kvm_device_attr attr;
2376         int r;
2377
2378         switch (ioctl) {
2379         case KVM_S390_INTERRUPT: {
2380                 struct kvm_s390_interrupt s390int;
2381
2382                 r = -EFAULT;
2383                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2384                         break;
2385                 r = kvm_s390_inject_vm(kvm, &s390int);
2386                 break;
2387         }
2388         case KVM_CREATE_IRQCHIP: {
2389                 struct kvm_irq_routing_entry routing;
2390
2391                 r = -EINVAL;
2392                 if (kvm->arch.use_irqchip) {
2393                         /* Set up dummy routing. */
2394                         memset(&routing, 0, sizeof(routing));
2395                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2396                 }
2397                 break;
2398         }
2399         case KVM_SET_DEVICE_ATTR: {
2400                 r = -EFAULT;
2401                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2402                         break;
2403                 r = kvm_s390_vm_set_attr(kvm, &attr);
2404                 break;
2405         }
2406         case KVM_GET_DEVICE_ATTR: {
2407                 r = -EFAULT;
2408                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2409                         break;
2410                 r = kvm_s390_vm_get_attr(kvm, &attr);
2411                 break;
2412         }
2413         case KVM_HAS_DEVICE_ATTR: {
2414                 r = -EFAULT;
2415                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2416                         break;
2417                 r = kvm_s390_vm_has_attr(kvm, &attr);
2418                 break;
2419         }
2420         case KVM_S390_GET_SKEYS: {
2421                 struct kvm_s390_skeys args;
2422
2423                 r = -EFAULT;
2424                 if (copy_from_user(&args, argp,
2425                                    sizeof(struct kvm_s390_skeys)))
2426                         break;
2427                 r = kvm_s390_get_skeys(kvm, &args);
2428                 break;
2429         }
2430         case KVM_S390_SET_SKEYS: {
2431                 struct kvm_s390_skeys args;
2432
2433                 r = -EFAULT;
2434                 if (copy_from_user(&args, argp,
2435                                    sizeof(struct kvm_s390_skeys)))
2436                         break;
2437                 r = kvm_s390_set_skeys(kvm, &args);
2438                 break;
2439         }
2440         case KVM_S390_GET_CMMA_BITS: {
2441                 struct kvm_s390_cmma_log args;
2442
2443                 r = -EFAULT;
2444                 if (copy_from_user(&args, argp, sizeof(args)))
2445                         break;
2446                 mutex_lock(&kvm->slots_lock);
2447                 r = kvm_s390_get_cmma_bits(kvm, &args);
2448                 mutex_unlock(&kvm->slots_lock);
2449                 if (!r) {
2450                         r = copy_to_user(argp, &args, sizeof(args));
2451                         if (r)
2452                                 r = -EFAULT;
2453                 }
2454                 break;
2455         }
2456         case KVM_S390_SET_CMMA_BITS: {
2457                 struct kvm_s390_cmma_log args;
2458
2459                 r = -EFAULT;
2460                 if (copy_from_user(&args, argp, sizeof(args)))
2461                         break;
2462                 mutex_lock(&kvm->slots_lock);
2463                 r = kvm_s390_set_cmma_bits(kvm, &args);
2464                 mutex_unlock(&kvm->slots_lock);
2465                 break;
2466         }
2467         case KVM_S390_PV_COMMAND: {
2468                 struct kvm_pv_cmd args;
2469
2470                 /* protvirt means user sigp */
2471                 kvm->arch.user_cpu_state_ctrl = 1;
2472                 r = 0;
2473                 if (!is_prot_virt_host()) {
2474                         r = -EINVAL;
2475                         break;
2476                 }
2477                 if (copy_from_user(&args, argp, sizeof(args))) {
2478                         r = -EFAULT;
2479                         break;
2480                 }
2481                 if (args.flags) {
2482                         r = -EINVAL;
2483                         break;
2484                 }
2485                 mutex_lock(&kvm->lock);
2486                 r = kvm_s390_handle_pv(kvm, &args);
2487                 mutex_unlock(&kvm->lock);
2488                 if (copy_to_user(argp, &args, sizeof(args))) {
2489                         r = -EFAULT;
2490                         break;
2491                 }
2492                 break;
2493         }
2494         default:
2495                 r = -ENOTTY;
2496         }
2497
2498         return r;
2499 }
2500
2501 static int kvm_s390_apxa_installed(void)
2502 {
2503         struct ap_config_info info;
2504
2505         if (ap_instructions_available()) {
2506                 if (ap_qci(&info) == 0)
2507                         return info.apxa;
2508         }
2509
2510         return 0;
2511 }
2512
2513 /*
2514  * The format of the crypto control block (CRYCB) is specified in the 3 low
2515  * order bits of the CRYCB designation (CRYCBD) field as follows:
2516  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2517  *           AP extended addressing (APXA) facility are installed.
2518  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2519  * Format 2: Both the APXA and MSAX3 facilities are installed
2520  */
2521 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2522 {
2523         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2524
2525         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2526         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2527
2528         /* Check whether MSAX3 is installed */
2529         if (!test_kvm_facility(kvm, 76))
2530                 return;
2531
2532         if (kvm_s390_apxa_installed())
2533                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2534         else
2535                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2536 }
2537
2538 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2539                                unsigned long *aqm, unsigned long *adm)
2540 {
2541         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2542
2543         mutex_lock(&kvm->lock);
2544         kvm_s390_vcpu_block_all(kvm);
2545
2546         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2547         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2548                 memcpy(crycb->apcb1.apm, apm, 32);
2549                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2550                          apm[0], apm[1], apm[2], apm[3]);
2551                 memcpy(crycb->apcb1.aqm, aqm, 32);
2552                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2553                          aqm[0], aqm[1], aqm[2], aqm[3]);
2554                 memcpy(crycb->apcb1.adm, adm, 32);
2555                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2556                          adm[0], adm[1], adm[2], adm[3]);
2557                 break;
2558         case CRYCB_FORMAT1:
2559         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2560                 memcpy(crycb->apcb0.apm, apm, 8);
2561                 memcpy(crycb->apcb0.aqm, aqm, 2);
2562                 memcpy(crycb->apcb0.adm, adm, 2);
2563                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2564                          apm[0], *((unsigned short *)aqm),
2565                          *((unsigned short *)adm));
2566                 break;
2567         default:        /* Can not happen */
2568                 break;
2569         }
2570
2571         /* recreate the shadow crycb for each vcpu */
2572         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2573         kvm_s390_vcpu_unblock_all(kvm);
2574         mutex_unlock(&kvm->lock);
2575 }
2576 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2577
2578 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2579 {
2580         mutex_lock(&kvm->lock);
2581         kvm_s390_vcpu_block_all(kvm);
2582
2583         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2584                sizeof(kvm->arch.crypto.crycb->apcb0));
2585         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2586                sizeof(kvm->arch.crypto.crycb->apcb1));
2587
2588         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2589         /* recreate the shadow crycb for each vcpu */
2590         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2591         kvm_s390_vcpu_unblock_all(kvm);
2592         mutex_unlock(&kvm->lock);
2593 }
2594 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2595
2596 static u64 kvm_s390_get_initial_cpuid(void)
2597 {
2598         struct cpuid cpuid;
2599
2600         get_cpu_id(&cpuid);
2601         cpuid.version = 0xff;
2602         return *((u64 *) &cpuid);
2603 }
2604
2605 static void kvm_s390_crypto_init(struct kvm *kvm)
2606 {
2607         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2608         kvm_s390_set_crycb_format(kvm);
2609
2610         if (!test_kvm_facility(kvm, 76))
2611                 return;
2612
2613         /* Enable AES/DEA protected key functions by default */
2614         kvm->arch.crypto.aes_kw = 1;
2615         kvm->arch.crypto.dea_kw = 1;
2616         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2617                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2618         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2619                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2620 }
2621
2622 static void sca_dispose(struct kvm *kvm)
2623 {
2624         if (kvm->arch.use_esca)
2625                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2626         else
2627                 free_page((unsigned long)(kvm->arch.sca));
2628         kvm->arch.sca = NULL;
2629 }
2630
2631 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2632 {
2633         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2634         int i, rc;
2635         char debug_name[16];
2636         static unsigned long sca_offset;
2637
2638         rc = -EINVAL;
2639 #ifdef CONFIG_KVM_S390_UCONTROL
2640         if (type & ~KVM_VM_S390_UCONTROL)
2641                 goto out_err;
2642         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2643                 goto out_err;
2644 #else
2645         if (type)
2646                 goto out_err;
2647 #endif
2648
2649         rc = s390_enable_sie();
2650         if (rc)
2651                 goto out_err;
2652
2653         rc = -ENOMEM;
2654
2655         if (!sclp.has_64bscao)
2656                 alloc_flags |= GFP_DMA;
2657         rwlock_init(&kvm->arch.sca_lock);
2658         /* start with basic SCA */
2659         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2660         if (!kvm->arch.sca)
2661                 goto out_err;
2662         mutex_lock(&kvm_lock);
2663         sca_offset += 16;
2664         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2665                 sca_offset = 0;
2666         kvm->arch.sca = (struct bsca_block *)
2667                         ((char *) kvm->arch.sca + sca_offset);
2668         mutex_unlock(&kvm_lock);
2669
2670         sprintf(debug_name, "kvm-%u", current->pid);
2671
2672         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2673         if (!kvm->arch.dbf)
2674                 goto out_err;
2675
2676         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2677         kvm->arch.sie_page2 =
2678              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2679         if (!kvm->arch.sie_page2)
2680                 goto out_err;
2681
2682         kvm->arch.sie_page2->kvm = kvm;
2683         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2684
2685         for (i = 0; i < kvm_s390_fac_size(); i++) {
2686                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2687                                               (kvm_s390_fac_base[i] |
2688                                                kvm_s390_fac_ext[i]);
2689                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2690                                               kvm_s390_fac_base[i];
2691         }
2692         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2693
2694         /* we are always in czam mode - even on pre z14 machines */
2695         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2696         set_kvm_facility(kvm->arch.model.fac_list, 138);
2697         /* we emulate STHYI in kvm */
2698         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2699         set_kvm_facility(kvm->arch.model.fac_list, 74);
2700         if (MACHINE_HAS_TLB_GUEST) {
2701                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2702                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2703         }
2704
2705         if (css_general_characteristics.aiv && test_facility(65))
2706                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2707
2708         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2709         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2710
2711         kvm_s390_crypto_init(kvm);
2712
2713         mutex_init(&kvm->arch.float_int.ais_lock);
2714         spin_lock_init(&kvm->arch.float_int.lock);
2715         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2716                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2717         init_waitqueue_head(&kvm->arch.ipte_wq);
2718         mutex_init(&kvm->arch.ipte_mutex);
2719
2720         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2721         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2722
2723         if (type & KVM_VM_S390_UCONTROL) {
2724                 kvm->arch.gmap = NULL;
2725                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2726         } else {
2727                 if (sclp.hamax == U64_MAX)
2728                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2729                 else
2730                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2731                                                     sclp.hamax + 1);
2732                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2733                 if (!kvm->arch.gmap)
2734                         goto out_err;
2735                 kvm->arch.gmap->private = kvm;
2736                 kvm->arch.gmap->pfault_enabled = 0;
2737         }
2738
2739         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2740         kvm->arch.use_skf = sclp.has_skey;
2741         spin_lock_init(&kvm->arch.start_stop_lock);
2742         kvm_s390_vsie_init(kvm);
2743         if (use_gisa)
2744                 kvm_s390_gisa_init(kvm);
2745         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2746
2747         return 0;
2748 out_err:
2749         free_page((unsigned long)kvm->arch.sie_page2);
2750         debug_unregister(kvm->arch.dbf);
2751         sca_dispose(kvm);
2752         KVM_EVENT(3, "creation of vm failed: %d", rc);
2753         return rc;
2754 }
2755
2756 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2757 {
2758         u16 rc, rrc;
2759
2760         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2761         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2762         kvm_s390_clear_local_irqs(vcpu);
2763         kvm_clear_async_pf_completion_queue(vcpu);
2764         if (!kvm_is_ucontrol(vcpu->kvm))
2765                 sca_del_vcpu(vcpu);
2766
2767         if (kvm_is_ucontrol(vcpu->kvm))
2768                 gmap_remove(vcpu->arch.gmap);
2769
2770         if (vcpu->kvm->arch.use_cmma)
2771                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2772         /* We can not hold the vcpu mutex here, we are already dying */
2773         if (kvm_s390_pv_cpu_get_handle(vcpu))
2774                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2775         free_page((unsigned long)(vcpu->arch.sie_block));
2776 }
2777
2778 static void kvm_free_vcpus(struct kvm *kvm)
2779 {
2780         unsigned int i;
2781         struct kvm_vcpu *vcpu;
2782
2783         kvm_for_each_vcpu(i, vcpu, kvm)
2784                 kvm_vcpu_destroy(vcpu);
2785
2786         mutex_lock(&kvm->lock);
2787         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2788                 kvm->vcpus[i] = NULL;
2789
2790         atomic_set(&kvm->online_vcpus, 0);
2791         mutex_unlock(&kvm->lock);
2792 }
2793
2794 void kvm_arch_destroy_vm(struct kvm *kvm)
2795 {
2796         u16 rc, rrc;
2797
2798         kvm_free_vcpus(kvm);
2799         sca_dispose(kvm);
2800         kvm_s390_gisa_destroy(kvm);
2801         /*
2802          * We are already at the end of life and kvm->lock is not taken.
2803          * This is ok as the file descriptor is closed by now and nobody
2804          * can mess with the pv state. To avoid lockdep_assert_held from
2805          * complaining we do not use kvm_s390_pv_is_protected.
2806          */
2807         if (kvm_s390_pv_get_handle(kvm))
2808                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2809         debug_unregister(kvm->arch.dbf);
2810         free_page((unsigned long)kvm->arch.sie_page2);
2811         if (!kvm_is_ucontrol(kvm))
2812                 gmap_remove(kvm->arch.gmap);
2813         kvm_s390_destroy_adapters(kvm);
2814         kvm_s390_clear_float_irqs(kvm);
2815         kvm_s390_vsie_destroy(kvm);
2816         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2817 }
2818
2819 /* Section: vcpu related */
2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2821 {
2822         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2823         if (!vcpu->arch.gmap)
2824                 return -ENOMEM;
2825         vcpu->arch.gmap->private = vcpu->kvm;
2826
2827         return 0;
2828 }
2829
2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2831 {
2832         if (!kvm_s390_use_sca_entries())
2833                 return;
2834         read_lock(&vcpu->kvm->arch.sca_lock);
2835         if (vcpu->kvm->arch.use_esca) {
2836                 struct esca_block *sca = vcpu->kvm->arch.sca;
2837
2838                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2839                 sca->cpu[vcpu->vcpu_id].sda = 0;
2840         } else {
2841                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2842
2843                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2844                 sca->cpu[vcpu->vcpu_id].sda = 0;
2845         }
2846         read_unlock(&vcpu->kvm->arch.sca_lock);
2847 }
2848
2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2850 {
2851         if (!kvm_s390_use_sca_entries()) {
2852                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2853
2854                 /* we still need the basic sca for the ipte control */
2855                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2857                 return;
2858         }
2859         read_lock(&vcpu->kvm->arch.sca_lock);
2860         if (vcpu->kvm->arch.use_esca) {
2861                 struct esca_block *sca = vcpu->kvm->arch.sca;
2862
2863                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2866                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2867                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2868         } else {
2869                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2870
2871                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2872                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2873                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2874                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2875         }
2876         read_unlock(&vcpu->kvm->arch.sca_lock);
2877 }
2878
2879 /* Basic SCA to Extended SCA data copy routines */
2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2881 {
2882         d->sda = s->sda;
2883         d->sigp_ctrl.c = s->sigp_ctrl.c;
2884         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2885 }
2886
2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2888 {
2889         int i;
2890
2891         d->ipte_control = s->ipte_control;
2892         d->mcn[0] = s->mcn;
2893         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2894                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2895 }
2896
2897 static int sca_switch_to_extended(struct kvm *kvm)
2898 {
2899         struct bsca_block *old_sca = kvm->arch.sca;
2900         struct esca_block *new_sca;
2901         struct kvm_vcpu *vcpu;
2902         unsigned int vcpu_idx;
2903         u32 scaol, scaoh;
2904
2905         if (kvm->arch.use_esca)
2906                 return 0;
2907
2908         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2909         if (!new_sca)
2910                 return -ENOMEM;
2911
2912         scaoh = (u32)((u64)(new_sca) >> 32);
2913         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2914
2915         kvm_s390_vcpu_block_all(kvm);
2916         write_lock(&kvm->arch.sca_lock);
2917
2918         sca_copy_b_to_e(new_sca, old_sca);
2919
2920         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2921                 vcpu->arch.sie_block->scaoh = scaoh;
2922                 vcpu->arch.sie_block->scaol = scaol;
2923                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2924         }
2925         kvm->arch.sca = new_sca;
2926         kvm->arch.use_esca = 1;
2927
2928         write_unlock(&kvm->arch.sca_lock);
2929         kvm_s390_vcpu_unblock_all(kvm);
2930
2931         free_page((unsigned long)old_sca);
2932
2933         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2934                  old_sca, kvm->arch.sca);
2935         return 0;
2936 }
2937
2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2939 {
2940         int rc;
2941
2942         if (!kvm_s390_use_sca_entries()) {
2943                 if (id < KVM_MAX_VCPUS)
2944                         return true;
2945                 return false;
2946         }
2947         if (id < KVM_S390_BSCA_CPU_SLOTS)
2948                 return true;
2949         if (!sclp.has_esca || !sclp.has_64bscao)
2950                 return false;
2951
2952         mutex_lock(&kvm->lock);
2953         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2954         mutex_unlock(&kvm->lock);
2955
2956         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2957 }
2958
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2961 {
2962         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2963         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964         vcpu->arch.cputm_start = get_tod_clock_fast();
2965         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2966 }
2967
2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2970 {
2971         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2972         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2973         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2974         vcpu->arch.cputm_start = 0;
2975         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2976 }
2977
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2980 {
2981         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2982         vcpu->arch.cputm_enabled = true;
2983         __start_cpu_timer_accounting(vcpu);
2984 }
2985
2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2988 {
2989         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2990         __stop_cpu_timer_accounting(vcpu);
2991         vcpu->arch.cputm_enabled = false;
2992 }
2993
2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2995 {
2996         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2997         __enable_cpu_timer_accounting(vcpu);
2998         preempt_enable();
2999 }
3000
3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002 {
3003         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004         __disable_cpu_timer_accounting(vcpu);
3005         preempt_enable();
3006 }
3007
3008 /* set the cpu timer - may only be called from the VCPU thread itself */
3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3010 {
3011         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3012         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3013         if (vcpu->arch.cputm_enabled)
3014                 vcpu->arch.cputm_start = get_tod_clock_fast();
3015         vcpu->arch.sie_block->cputm = cputm;
3016         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3017         preempt_enable();
3018 }
3019
3020 /* update and get the cpu timer - can also be called from other VCPU threads */
3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3022 {
3023         unsigned int seq;
3024         __u64 value;
3025
3026         if (unlikely(!vcpu->arch.cputm_enabled))
3027                 return vcpu->arch.sie_block->cputm;
3028
3029         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3030         do {
3031                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3032                 /*
3033                  * If the writer would ever execute a read in the critical
3034                  * section, e.g. in irq context, we have a deadlock.
3035                  */
3036                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3037                 value = vcpu->arch.sie_block->cputm;
3038                 /* if cputm_start is 0, accounting is being started/stopped */
3039                 if (likely(vcpu->arch.cputm_start))
3040                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3041         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3042         preempt_enable();
3043         return value;
3044 }
3045
3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3047 {
3048
3049         gmap_enable(vcpu->arch.enabled_gmap);
3050         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3051         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052                 __start_cpu_timer_accounting(vcpu);
3053         vcpu->cpu = cpu;
3054 }
3055
3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3057 {
3058         vcpu->cpu = -1;
3059         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3060                 __stop_cpu_timer_accounting(vcpu);
3061         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3062         vcpu->arch.enabled_gmap = gmap_get_enabled();
3063         gmap_disable(vcpu->arch.enabled_gmap);
3064
3065 }
3066
3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3068 {
3069         mutex_lock(&vcpu->kvm->lock);
3070         preempt_disable();
3071         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3072         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3073         preempt_enable();
3074         mutex_unlock(&vcpu->kvm->lock);
3075         if (!kvm_is_ucontrol(vcpu->kvm)) {
3076                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3077                 sca_add_vcpu(vcpu);
3078         }
3079         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3080                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3081         /* make vcpu_load load the right gmap on the first trigger */
3082         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3083 }
3084
3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3086 {
3087         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3088             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3089                 return true;
3090         return false;
3091 }
3092
3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3094 {
3095         /* At least one ECC subfunction must be present */
3096         return kvm_has_pckmo_subfunc(kvm, 32) ||
3097                kvm_has_pckmo_subfunc(kvm, 33) ||
3098                kvm_has_pckmo_subfunc(kvm, 34) ||
3099                kvm_has_pckmo_subfunc(kvm, 40) ||
3100                kvm_has_pckmo_subfunc(kvm, 41);
3101
3102 }
3103
3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3105 {
3106         /*
3107          * If the AP instructions are not being interpreted and the MSAX3
3108          * facility is not configured for the guest, there is nothing to set up.
3109          */
3110         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3111                 return;
3112
3113         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3114         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3115         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3116         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3117
3118         if (vcpu->kvm->arch.crypto.apie)
3119                 vcpu->arch.sie_block->eca |= ECA_APIE;
3120
3121         /* Set up protected key support */
3122         if (vcpu->kvm->arch.crypto.aes_kw) {
3123                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3124                 /* ecc is also wrapped with AES key */
3125                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3126                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3127         }
3128
3129         if (vcpu->kvm->arch.crypto.dea_kw)
3130                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3131 }
3132
3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3134 {
3135         free_page(vcpu->arch.sie_block->cbrlo);
3136         vcpu->arch.sie_block->cbrlo = 0;
3137 }
3138
3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3140 {
3141         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3142         if (!vcpu->arch.sie_block->cbrlo)
3143                 return -ENOMEM;
3144         return 0;
3145 }
3146
3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3148 {
3149         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3150
3151         vcpu->arch.sie_block->ibc = model->ibc;
3152         if (test_kvm_facility(vcpu->kvm, 7))
3153                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3154 }
3155
3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3157 {
3158         int rc = 0;
3159         u16 uvrc, uvrrc;
3160
3161         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3162                                                     CPUSTAT_SM |
3163                                                     CPUSTAT_STOPPED);
3164
3165         if (test_kvm_facility(vcpu->kvm, 78))
3166                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3167         else if (test_kvm_facility(vcpu->kvm, 8))
3168                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3169
3170         kvm_s390_vcpu_setup_model(vcpu);
3171
3172         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3173         if (MACHINE_HAS_ESOP)
3174                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3175         if (test_kvm_facility(vcpu->kvm, 9))
3176                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3177         if (test_kvm_facility(vcpu->kvm, 73))
3178                 vcpu->arch.sie_block->ecb |= ECB_TE;
3179
3180         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3181                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3182         if (test_kvm_facility(vcpu->kvm, 130))
3183                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3184         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3185         if (sclp.has_cei)
3186                 vcpu->arch.sie_block->eca |= ECA_CEI;
3187         if (sclp.has_ib)
3188                 vcpu->arch.sie_block->eca |= ECA_IB;
3189         if (sclp.has_siif)
3190                 vcpu->arch.sie_block->eca |= ECA_SII;
3191         if (sclp.has_sigpif)
3192                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3193         if (test_kvm_facility(vcpu->kvm, 129)) {
3194                 vcpu->arch.sie_block->eca |= ECA_VX;
3195                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3196         }
3197         if (test_kvm_facility(vcpu->kvm, 139))
3198                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3199         if (test_kvm_facility(vcpu->kvm, 156))
3200                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3201         if (vcpu->arch.sie_block->gd) {
3202                 vcpu->arch.sie_block->eca |= ECA_AIV;
3203                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3204                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3205         }
3206         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3207                                         | SDNXC;
3208         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3209
3210         if (sclp.has_kss)
3211                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3212         else
3213                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3214
3215         if (vcpu->kvm->arch.use_cmma) {
3216                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3217                 if (rc)
3218                         return rc;
3219         }
3220         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3221         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3222
3223         vcpu->arch.sie_block->hpid = HPID_KVM;
3224
3225         kvm_s390_vcpu_crypto_setup(vcpu);
3226
3227         mutex_lock(&vcpu->kvm->lock);
3228         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3229                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3230                 if (rc)
3231                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3232         }
3233         mutex_unlock(&vcpu->kvm->lock);
3234
3235         return rc;
3236 }
3237
3238 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3239 {
3240         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3241                 return -EINVAL;
3242         return 0;
3243 }
3244
3245 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3246 {
3247         struct sie_page *sie_page;
3248         int rc;
3249
3250         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3251         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3252         if (!sie_page)
3253                 return -ENOMEM;
3254
3255         vcpu->arch.sie_block = &sie_page->sie_block;
3256         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3257
3258         /* the real guest size will always be smaller than msl */
3259         vcpu->arch.sie_block->mso = 0;
3260         vcpu->arch.sie_block->msl = sclp.hamax;
3261
3262         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3263         spin_lock_init(&vcpu->arch.local_int.lock);
3264         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3265         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3266                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3267         seqcount_init(&vcpu->arch.cputm_seqcount);
3268
3269         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3270         kvm_clear_async_pf_completion_queue(vcpu);
3271         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3272                                     KVM_SYNC_GPRS |
3273                                     KVM_SYNC_ACRS |
3274                                     KVM_SYNC_CRS |
3275                                     KVM_SYNC_ARCH0 |
3276                                     KVM_SYNC_PFAULT |
3277                                     KVM_SYNC_DIAG318;
3278         kvm_s390_set_prefix(vcpu, 0);
3279         if (test_kvm_facility(vcpu->kvm, 64))
3280                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3281         if (test_kvm_facility(vcpu->kvm, 82))
3282                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3283         if (test_kvm_facility(vcpu->kvm, 133))
3284                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3285         if (test_kvm_facility(vcpu->kvm, 156))
3286                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3287         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3288          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3289          */
3290         if (MACHINE_HAS_VX)
3291                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3292         else
3293                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3294
3295         if (kvm_is_ucontrol(vcpu->kvm)) {
3296                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3297                 if (rc)
3298                         goto out_free_sie_block;
3299         }
3300
3301         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3302                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3303         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3304
3305         rc = kvm_s390_vcpu_setup(vcpu);
3306         if (rc)
3307                 goto out_ucontrol_uninit;
3308         return 0;
3309
3310 out_ucontrol_uninit:
3311         if (kvm_is_ucontrol(vcpu->kvm))
3312                 gmap_remove(vcpu->arch.gmap);
3313 out_free_sie_block:
3314         free_page((unsigned long)(vcpu->arch.sie_block));
3315         return rc;
3316 }
3317
3318 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3319 {
3320         return kvm_s390_vcpu_has_irq(vcpu, 0);
3321 }
3322
3323 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3324 {
3325         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3326 }
3327
3328 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3329 {
3330         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3331         exit_sie(vcpu);
3332 }
3333
3334 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3335 {
3336         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3337 }
3338
3339 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3340 {
3341         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3342         exit_sie(vcpu);
3343 }
3344
3345 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3346 {
3347         return atomic_read(&vcpu->arch.sie_block->prog20) &
3348                (PROG_BLOCK_SIE | PROG_REQUEST);
3349 }
3350
3351 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3352 {
3353         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3354 }
3355
3356 /*
3357  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3358  * If the CPU is not running (e.g. waiting as idle) the function will
3359  * return immediately. */
3360 void exit_sie(struct kvm_vcpu *vcpu)
3361 {
3362         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3363         kvm_s390_vsie_kick(vcpu);
3364         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3365                 cpu_relax();
3366 }
3367
3368 /* Kick a guest cpu out of SIE to process a request synchronously */
3369 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3370 {
3371         kvm_make_request(req, vcpu);
3372         kvm_s390_vcpu_request(vcpu);
3373 }
3374
3375 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3376                               unsigned long end)
3377 {
3378         struct kvm *kvm = gmap->private;
3379         struct kvm_vcpu *vcpu;
3380         unsigned long prefix;
3381         int i;
3382
3383         if (gmap_is_shadow(gmap))
3384                 return;
3385         if (start >= 1UL << 31)
3386                 /* We are only interested in prefix pages */
3387                 return;
3388         kvm_for_each_vcpu(i, vcpu, kvm) {
3389                 /* match against both prefix pages */
3390                 prefix = kvm_s390_get_prefix(vcpu);
3391                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3392                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3393                                    start, end);
3394                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3395                 }
3396         }
3397 }
3398
3399 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3400 {
3401         /* do not poll with more than halt_poll_max_steal percent of steal time */
3402         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3403             halt_poll_max_steal) {
3404                 vcpu->stat.halt_no_poll_steal++;
3405                 return true;
3406         }
3407         return false;
3408 }
3409
3410 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3411 {
3412         /* kvm common code refers to this, but never calls it */
3413         BUG();
3414         return 0;
3415 }
3416
3417 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3418                                            struct kvm_one_reg *reg)
3419 {
3420         int r = -EINVAL;
3421
3422         switch (reg->id) {
3423         case KVM_REG_S390_TODPR:
3424                 r = put_user(vcpu->arch.sie_block->todpr,
3425                              (u32 __user *)reg->addr);
3426                 break;
3427         case KVM_REG_S390_EPOCHDIFF:
3428                 r = put_user(vcpu->arch.sie_block->epoch,
3429                              (u64 __user *)reg->addr);
3430                 break;
3431         case KVM_REG_S390_CPU_TIMER:
3432                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3433                              (u64 __user *)reg->addr);
3434                 break;
3435         case KVM_REG_S390_CLOCK_COMP:
3436                 r = put_user(vcpu->arch.sie_block->ckc,
3437                              (u64 __user *)reg->addr);
3438                 break;
3439         case KVM_REG_S390_PFTOKEN:
3440                 r = put_user(vcpu->arch.pfault_token,
3441                              (u64 __user *)reg->addr);
3442                 break;
3443         case KVM_REG_S390_PFCOMPARE:
3444                 r = put_user(vcpu->arch.pfault_compare,
3445                              (u64 __user *)reg->addr);
3446                 break;
3447         case KVM_REG_S390_PFSELECT:
3448                 r = put_user(vcpu->arch.pfault_select,
3449                              (u64 __user *)reg->addr);
3450                 break;
3451         case KVM_REG_S390_PP:
3452                 r = put_user(vcpu->arch.sie_block->pp,
3453                              (u64 __user *)reg->addr);
3454                 break;
3455         case KVM_REG_S390_GBEA:
3456                 r = put_user(vcpu->arch.sie_block->gbea,
3457                              (u64 __user *)reg->addr);
3458                 break;
3459         default:
3460                 break;
3461         }
3462
3463         return r;
3464 }
3465
3466 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3467                                            struct kvm_one_reg *reg)
3468 {
3469         int r = -EINVAL;
3470         __u64 val;
3471
3472         switch (reg->id) {
3473         case KVM_REG_S390_TODPR:
3474                 r = get_user(vcpu->arch.sie_block->todpr,
3475                              (u32 __user *)reg->addr);
3476                 break;
3477         case KVM_REG_S390_EPOCHDIFF:
3478                 r = get_user(vcpu->arch.sie_block->epoch,
3479                              (u64 __user *)reg->addr);
3480                 break;
3481         case KVM_REG_S390_CPU_TIMER:
3482                 r = get_user(val, (u64 __user *)reg->addr);
3483                 if (!r)
3484                         kvm_s390_set_cpu_timer(vcpu, val);
3485                 break;
3486         case KVM_REG_S390_CLOCK_COMP:
3487                 r = get_user(vcpu->arch.sie_block->ckc,
3488                              (u64 __user *)reg->addr);
3489                 break;
3490         case KVM_REG_S390_PFTOKEN:
3491                 r = get_user(vcpu->arch.pfault_token,
3492                              (u64 __user *)reg->addr);
3493                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3494                         kvm_clear_async_pf_completion_queue(vcpu);
3495                 break;
3496         case KVM_REG_S390_PFCOMPARE:
3497                 r = get_user(vcpu->arch.pfault_compare,
3498                              (u64 __user *)reg->addr);
3499                 break;
3500         case KVM_REG_S390_PFSELECT:
3501                 r = get_user(vcpu->arch.pfault_select,
3502                              (u64 __user *)reg->addr);
3503                 break;
3504         case KVM_REG_S390_PP:
3505                 r = get_user(vcpu->arch.sie_block->pp,
3506                              (u64 __user *)reg->addr);
3507                 break;
3508         case KVM_REG_S390_GBEA:
3509                 r = get_user(vcpu->arch.sie_block->gbea,
3510                              (u64 __user *)reg->addr);
3511                 break;
3512         default:
3513                 break;
3514         }
3515
3516         return r;
3517 }
3518
3519 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3520 {
3521         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3522         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3523         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3524
3525         kvm_clear_async_pf_completion_queue(vcpu);
3526         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3527                 kvm_s390_vcpu_stop(vcpu);
3528         kvm_s390_clear_local_irqs(vcpu);
3529 }
3530
3531 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3532 {
3533         /* Initial reset is a superset of the normal reset */
3534         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3535
3536         /*
3537          * This equals initial cpu reset in pop, but we don't switch to ESA.
3538          * We do not only reset the internal data, but also ...
3539          */
3540         vcpu->arch.sie_block->gpsw.mask = 0;
3541         vcpu->arch.sie_block->gpsw.addr = 0;
3542         kvm_s390_set_prefix(vcpu, 0);
3543         kvm_s390_set_cpu_timer(vcpu, 0);
3544         vcpu->arch.sie_block->ckc = 0;
3545         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3546         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3547         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3548
3549         /* ... the data in sync regs */
3550         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3551         vcpu->run->s.regs.ckc = 0;
3552         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3553         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3554         vcpu->run->psw_addr = 0;
3555         vcpu->run->psw_mask = 0;
3556         vcpu->run->s.regs.todpr = 0;
3557         vcpu->run->s.regs.cputm = 0;
3558         vcpu->run->s.regs.ckc = 0;
3559         vcpu->run->s.regs.pp = 0;
3560         vcpu->run->s.regs.gbea = 1;
3561         vcpu->run->s.regs.fpc = 0;
3562         /*
3563          * Do not reset these registers in the protected case, as some of
3564          * them are overlayed and they are not accessible in this case
3565          * anyway.
3566          */
3567         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3568                 vcpu->arch.sie_block->gbea = 1;
3569                 vcpu->arch.sie_block->pp = 0;
3570                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3571                 vcpu->arch.sie_block->todpr = 0;
3572         }
3573 }
3574
3575 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3576 {
3577         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3578
3579         /* Clear reset is a superset of the initial reset */
3580         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3581
3582         memset(&regs->gprs, 0, sizeof(regs->gprs));
3583         memset(&regs->vrs, 0, sizeof(regs->vrs));
3584         memset(&regs->acrs, 0, sizeof(regs->acrs));
3585         memset(&regs->gscb, 0, sizeof(regs->gscb));
3586
3587         regs->etoken = 0;
3588         regs->etoken_extension = 0;
3589 }
3590
3591 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3592 {
3593         vcpu_load(vcpu);
3594         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3595         vcpu_put(vcpu);
3596         return 0;
3597 }
3598
3599 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3600 {
3601         vcpu_load(vcpu);
3602         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3603         vcpu_put(vcpu);
3604         return 0;
3605 }
3606
3607 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3608                                   struct kvm_sregs *sregs)
3609 {
3610         vcpu_load(vcpu);
3611
3612         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3613         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3614
3615         vcpu_put(vcpu);
3616         return 0;
3617 }
3618
3619 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3620                                   struct kvm_sregs *sregs)
3621 {
3622         vcpu_load(vcpu);
3623
3624         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3625         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3626
3627         vcpu_put(vcpu);
3628         return 0;
3629 }
3630
3631 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3632 {
3633         int ret = 0;
3634
3635         vcpu_load(vcpu);
3636
3637         if (test_fp_ctl(fpu->fpc)) {
3638                 ret = -EINVAL;
3639                 goto out;
3640         }
3641         vcpu->run->s.regs.fpc = fpu->fpc;
3642         if (MACHINE_HAS_VX)
3643                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3644                                  (freg_t *) fpu->fprs);
3645         else
3646                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3647
3648 out:
3649         vcpu_put(vcpu);
3650         return ret;
3651 }
3652
3653 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3654 {
3655         vcpu_load(vcpu);
3656
3657         /* make sure we have the latest values */
3658         save_fpu_regs();
3659         if (MACHINE_HAS_VX)
3660                 convert_vx_to_fp((freg_t *) fpu->fprs,
3661                                  (__vector128 *) vcpu->run->s.regs.vrs);
3662         else
3663                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3664         fpu->fpc = vcpu->run->s.regs.fpc;
3665
3666         vcpu_put(vcpu);
3667         return 0;
3668 }
3669
3670 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3671 {
3672         int rc = 0;
3673
3674         if (!is_vcpu_stopped(vcpu))
3675                 rc = -EBUSY;
3676         else {
3677                 vcpu->run->psw_mask = psw.mask;
3678                 vcpu->run->psw_addr = psw.addr;
3679         }
3680         return rc;
3681 }
3682
3683 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3684                                   struct kvm_translation *tr)
3685 {
3686         return -EINVAL; /* not implemented yet */
3687 }
3688
3689 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3690                               KVM_GUESTDBG_USE_HW_BP | \
3691                               KVM_GUESTDBG_ENABLE)
3692
3693 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3694                                         struct kvm_guest_debug *dbg)
3695 {
3696         int rc = 0;
3697
3698         vcpu_load(vcpu);
3699
3700         vcpu->guest_debug = 0;
3701         kvm_s390_clear_bp_data(vcpu);
3702
3703         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3704                 rc = -EINVAL;
3705                 goto out;
3706         }
3707         if (!sclp.has_gpere) {
3708                 rc = -EINVAL;
3709                 goto out;
3710         }
3711
3712         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3713                 vcpu->guest_debug = dbg->control;
3714                 /* enforce guest PER */
3715                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3716
3717                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3718                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3719         } else {
3720                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3721                 vcpu->arch.guestdbg.last_bp = 0;
3722         }
3723
3724         if (rc) {
3725                 vcpu->guest_debug = 0;
3726                 kvm_s390_clear_bp_data(vcpu);
3727                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3728         }
3729
3730 out:
3731         vcpu_put(vcpu);
3732         return rc;
3733 }
3734
3735 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3736                                     struct kvm_mp_state *mp_state)
3737 {
3738         int ret;
3739
3740         vcpu_load(vcpu);
3741
3742         /* CHECK_STOP and LOAD are not supported yet */
3743         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3744                                       KVM_MP_STATE_OPERATING;
3745
3746         vcpu_put(vcpu);
3747         return ret;
3748 }
3749
3750 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3751                                     struct kvm_mp_state *mp_state)
3752 {
3753         int rc = 0;
3754
3755         vcpu_load(vcpu);
3756
3757         /* user space knows about this interface - let it control the state */
3758         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3759
3760         switch (mp_state->mp_state) {
3761         case KVM_MP_STATE_STOPPED:
3762                 rc = kvm_s390_vcpu_stop(vcpu);
3763                 break;
3764         case KVM_MP_STATE_OPERATING:
3765                 rc = kvm_s390_vcpu_start(vcpu);
3766                 break;
3767         case KVM_MP_STATE_LOAD:
3768                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3769                         rc = -ENXIO;
3770                         break;
3771                 }
3772                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3773                 break;
3774         case KVM_MP_STATE_CHECK_STOP:
3775                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3776         default:
3777                 rc = -ENXIO;
3778         }
3779
3780         vcpu_put(vcpu);
3781         return rc;
3782 }
3783
3784 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3785 {
3786         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3787 }
3788
3789 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3790 {
3791 retry:
3792         kvm_s390_vcpu_request_handled(vcpu);
3793         if (!kvm_request_pending(vcpu))
3794                 return 0;
3795         /*
3796          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3797          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3798          * This ensures that the ipte instruction for this request has
3799          * already finished. We might race against a second unmapper that
3800          * wants to set the blocking bit. Lets just retry the request loop.
3801          */
3802         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3803                 int rc;
3804                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3805                                           kvm_s390_get_prefix(vcpu),
3806                                           PAGE_SIZE * 2, PROT_WRITE);
3807                 if (rc) {
3808                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3809                         return rc;
3810                 }
3811                 goto retry;
3812         }
3813
3814         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3815                 vcpu->arch.sie_block->ihcpu = 0xffff;
3816                 goto retry;
3817         }
3818
3819         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3820                 if (!ibs_enabled(vcpu)) {
3821                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3822                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3823                 }
3824                 goto retry;
3825         }
3826
3827         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3828                 if (ibs_enabled(vcpu)) {
3829                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3830                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3831                 }
3832                 goto retry;
3833         }
3834
3835         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3836                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3837                 goto retry;
3838         }
3839
3840         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3841                 /*
3842                  * Disable CMM virtualization; we will emulate the ESSA
3843                  * instruction manually, in order to provide additional
3844                  * functionalities needed for live migration.
3845                  */
3846                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3847                 goto retry;
3848         }
3849
3850         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3851                 /*
3852                  * Re-enable CMM virtualization if CMMA is available and
3853                  * CMM has been used.
3854                  */
3855                 if ((vcpu->kvm->arch.use_cmma) &&
3856                     (vcpu->kvm->mm->context.uses_cmm))
3857                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3858                 goto retry;
3859         }
3860
3861         /* nothing to do, just clear the request */
3862         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3863         /* we left the vsie handler, nothing to do, just clear the request */
3864         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3865
3866         return 0;
3867 }
3868
3869 void kvm_s390_set_tod_clock(struct kvm *kvm,
3870                             const struct kvm_s390_vm_tod_clock *gtod)
3871 {
3872         struct kvm_vcpu *vcpu;
3873         union tod_clock clk;
3874         int i;
3875
3876         mutex_lock(&kvm->lock);
3877         preempt_disable();
3878
3879         store_tod_clock_ext(&clk);
3880
3881         kvm->arch.epoch = gtod->tod - clk.tod;
3882         kvm->arch.epdx = 0;
3883         if (test_kvm_facility(kvm, 139)) {
3884                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3885                 if (kvm->arch.epoch > gtod->tod)
3886                         kvm->arch.epdx -= 1;
3887         }
3888
3889         kvm_s390_vcpu_block_all(kvm);
3890         kvm_for_each_vcpu(i, vcpu, kvm) {
3891                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3892                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3893         }
3894
3895         kvm_s390_vcpu_unblock_all(kvm);
3896         preempt_enable();
3897         mutex_unlock(&kvm->lock);
3898 }
3899
3900 /**
3901  * kvm_arch_fault_in_page - fault-in guest page if necessary
3902  * @vcpu: The corresponding virtual cpu
3903  * @gpa: Guest physical address
3904  * @writable: Whether the page should be writable or not
3905  *
3906  * Make sure that a guest page has been faulted-in on the host.
3907  *
3908  * Return: Zero on success, negative error code otherwise.
3909  */
3910 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3911 {
3912         return gmap_fault(vcpu->arch.gmap, gpa,
3913                           writable ? FAULT_FLAG_WRITE : 0);
3914 }
3915
3916 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3917                                       unsigned long token)
3918 {
3919         struct kvm_s390_interrupt inti;
3920         struct kvm_s390_irq irq;
3921
3922         if (start_token) {
3923                 irq.u.ext.ext_params2 = token;
3924                 irq.type = KVM_S390_INT_PFAULT_INIT;
3925                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3926         } else {
3927                 inti.type = KVM_S390_INT_PFAULT_DONE;
3928                 inti.parm64 = token;
3929                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3930         }
3931 }
3932
3933 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3934                                      struct kvm_async_pf *work)
3935 {
3936         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3937         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3938
3939         return true;
3940 }
3941
3942 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3943                                  struct kvm_async_pf *work)
3944 {
3945         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3946         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3947 }
3948
3949 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3950                                struct kvm_async_pf *work)
3951 {
3952         /* s390 will always inject the page directly */
3953 }
3954
3955 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3956 {
3957         /*
3958          * s390 will always inject the page directly,
3959          * but we still want check_async_completion to cleanup
3960          */
3961         return true;
3962 }
3963
3964 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3965 {
3966         hva_t hva;
3967         struct kvm_arch_async_pf arch;
3968
3969         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3970                 return false;
3971         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3972             vcpu->arch.pfault_compare)
3973                 return false;
3974         if (psw_extint_disabled(vcpu))
3975                 return false;
3976         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3977                 return false;
3978         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3979                 return false;
3980         if (!vcpu->arch.gmap->pfault_enabled)
3981                 return false;
3982
3983         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3984         hva += current->thread.gmap_addr & ~PAGE_MASK;
3985         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3986                 return false;
3987
3988         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3989 }
3990
3991 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3992 {
3993         int rc, cpuflags;
3994
3995         /*
3996          * On s390 notifications for arriving pages will be delivered directly
3997          * to the guest but the house keeping for completed pfaults is
3998          * handled outside the worker.
3999          */
4000         kvm_check_async_pf_completion(vcpu);
4001
4002         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4003         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4004
4005         if (need_resched())
4006                 schedule();
4007
4008         if (!kvm_is_ucontrol(vcpu->kvm)) {
4009                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4010                 if (rc)
4011                         return rc;
4012         }
4013
4014         rc = kvm_s390_handle_requests(vcpu);
4015         if (rc)
4016                 return rc;
4017
4018         if (guestdbg_enabled(vcpu)) {
4019                 kvm_s390_backup_guest_per_regs(vcpu);
4020                 kvm_s390_patch_guest_per_regs(vcpu);
4021         }
4022
4023         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4024
4025         vcpu->arch.sie_block->icptcode = 0;
4026         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4027         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4028         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4029
4030         return 0;
4031 }
4032
4033 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4034 {
4035         struct kvm_s390_pgm_info pgm_info = {
4036                 .code = PGM_ADDRESSING,
4037         };
4038         u8 opcode, ilen;
4039         int rc;
4040
4041         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4042         trace_kvm_s390_sie_fault(vcpu);
4043
4044         /*
4045          * We want to inject an addressing exception, which is defined as a
4046          * suppressing or terminating exception. However, since we came here
4047          * by a DAT access exception, the PSW still points to the faulting
4048          * instruction since DAT exceptions are nullifying. So we've got
4049          * to look up the current opcode to get the length of the instruction
4050          * to be able to forward the PSW.
4051          */
4052         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4053         ilen = insn_length(opcode);
4054         if (rc < 0) {
4055                 return rc;
4056         } else if (rc) {
4057                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4058                  * Forward by arbitrary ilc, injection will take care of
4059                  * nullification if necessary.
4060                  */
4061                 pgm_info = vcpu->arch.pgm;
4062                 ilen = 4;
4063         }
4064         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4065         kvm_s390_forward_psw(vcpu, ilen);
4066         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4067 }
4068
4069 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4070 {
4071         struct mcck_volatile_info *mcck_info;
4072         struct sie_page *sie_page;
4073
4074         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4075                    vcpu->arch.sie_block->icptcode);
4076         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4077
4078         if (guestdbg_enabled(vcpu))
4079                 kvm_s390_restore_guest_per_regs(vcpu);
4080
4081         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4082         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4083
4084         if (exit_reason == -EINTR) {
4085                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4086                 sie_page = container_of(vcpu->arch.sie_block,
4087                                         struct sie_page, sie_block);
4088                 mcck_info = &sie_page->mcck_info;
4089                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4090                 return 0;
4091         }
4092
4093         if (vcpu->arch.sie_block->icptcode > 0) {
4094                 int rc = kvm_handle_sie_intercept(vcpu);
4095
4096                 if (rc != -EOPNOTSUPP)
4097                         return rc;
4098                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4099                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4100                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4101                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4102                 return -EREMOTE;
4103         } else if (exit_reason != -EFAULT) {
4104                 vcpu->stat.exit_null++;
4105                 return 0;
4106         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4107                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4108                 vcpu->run->s390_ucontrol.trans_exc_code =
4109                                                 current->thread.gmap_addr;
4110                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4111                 return -EREMOTE;
4112         } else if (current->thread.gmap_pfault) {
4113                 trace_kvm_s390_major_guest_pfault(vcpu);
4114                 current->thread.gmap_pfault = 0;
4115                 if (kvm_arch_setup_async_pf(vcpu))
4116                         return 0;
4117                 vcpu->stat.pfault_sync++;
4118                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4119         }
4120         return vcpu_post_run_fault_in_sie(vcpu);
4121 }
4122
4123 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4124 static int __vcpu_run(struct kvm_vcpu *vcpu)
4125 {
4126         int rc, exit_reason;
4127         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4128
4129         /*
4130          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4131          * ning the guest), so that memslots (and other stuff) are protected
4132          */
4133         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4134
4135         do {
4136                 rc = vcpu_pre_run(vcpu);
4137                 if (rc)
4138                         break;
4139
4140                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4141                 /*
4142                  * As PF_VCPU will be used in fault handler, between
4143                  * guest_enter and guest_exit should be no uaccess.
4144                  */
4145                 local_irq_disable();
4146                 guest_enter_irqoff();
4147                 __disable_cpu_timer_accounting(vcpu);
4148                 local_irq_enable();
4149                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4150                         memcpy(sie_page->pv_grregs,
4151                                vcpu->run->s.regs.gprs,
4152                                sizeof(sie_page->pv_grregs));
4153                 }
4154                 if (test_cpu_flag(CIF_FPU))
4155                         load_fpu_regs();
4156                 exit_reason = sie64a(vcpu->arch.sie_block,
4157                                      vcpu->run->s.regs.gprs);
4158                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4159                         memcpy(vcpu->run->s.regs.gprs,
4160                                sie_page->pv_grregs,
4161                                sizeof(sie_page->pv_grregs));
4162                         /*
4163                          * We're not allowed to inject interrupts on intercepts
4164                          * that leave the guest state in an "in-between" state
4165                          * where the next SIE entry will do a continuation.
4166                          * Fence interrupts in our "internal" PSW.
4167                          */
4168                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4169                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4170                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4171                         }
4172                 }
4173                 local_irq_disable();
4174                 __enable_cpu_timer_accounting(vcpu);
4175                 guest_exit_irqoff();
4176                 local_irq_enable();
4177                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4178
4179                 rc = vcpu_post_run(vcpu, exit_reason);
4180         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4181
4182         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4183         return rc;
4184 }
4185
4186 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4187 {
4188         struct kvm_run *kvm_run = vcpu->run;
4189         struct runtime_instr_cb *riccb;
4190         struct gs_cb *gscb;
4191
4192         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4193         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4194         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4195         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4196         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4197                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4198                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4199                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4200         }
4201         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4202                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4203                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4204                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4205                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4206                         kvm_clear_async_pf_completion_queue(vcpu);
4207         }
4208         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4209                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4210                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4211         }
4212         /*
4213          * If userspace sets the riccb (e.g. after migration) to a valid state,
4214          * we should enable RI here instead of doing the lazy enablement.
4215          */
4216         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4217             test_kvm_facility(vcpu->kvm, 64) &&
4218             riccb->v &&
4219             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4220                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4221                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4222         }
4223         /*
4224          * If userspace sets the gscb (e.g. after migration) to non-zero,
4225          * we should enable GS here instead of doing the lazy enablement.
4226          */
4227         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4228             test_kvm_facility(vcpu->kvm, 133) &&
4229             gscb->gssm &&
4230             !vcpu->arch.gs_enabled) {
4231                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4232                 vcpu->arch.sie_block->ecb |= ECB_GS;
4233                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4234                 vcpu->arch.gs_enabled = 1;
4235         }
4236         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4237             test_kvm_facility(vcpu->kvm, 82)) {
4238                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4239                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4240         }
4241         if (MACHINE_HAS_GS) {
4242                 preempt_disable();
4243                 __ctl_set_bit(2, 4);
4244                 if (current->thread.gs_cb) {
4245                         vcpu->arch.host_gscb = current->thread.gs_cb;
4246                         save_gs_cb(vcpu->arch.host_gscb);
4247                 }
4248                 if (vcpu->arch.gs_enabled) {
4249                         current->thread.gs_cb = (struct gs_cb *)
4250                                                 &vcpu->run->s.regs.gscb;
4251                         restore_gs_cb(current->thread.gs_cb);
4252                 }
4253                 preempt_enable();
4254         }
4255         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4256 }
4257
4258 static void sync_regs(struct kvm_vcpu *vcpu)
4259 {
4260         struct kvm_run *kvm_run = vcpu->run;
4261
4262         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4263                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4264         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4265                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4266                 /* some control register changes require a tlb flush */
4267                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4268         }
4269         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4270                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4271                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4272         }
4273         save_access_regs(vcpu->arch.host_acrs);
4274         restore_access_regs(vcpu->run->s.regs.acrs);
4275         /* save host (userspace) fprs/vrs */
4276         save_fpu_regs();
4277         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4278         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4279         if (MACHINE_HAS_VX)
4280                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4281         else
4282                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4283         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4284         if (test_fp_ctl(current->thread.fpu.fpc))
4285                 /* User space provided an invalid FPC, let's clear it */
4286                 current->thread.fpu.fpc = 0;
4287
4288         /* Sync fmt2 only data */
4289         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4290                 sync_regs_fmt2(vcpu);
4291         } else {
4292                 /*
4293                  * In several places we have to modify our internal view to
4294                  * not do things that are disallowed by the ultravisor. For
4295                  * example we must not inject interrupts after specific exits
4296                  * (e.g. 112 prefix page not secure). We do this by turning
4297                  * off the machine check, external and I/O interrupt bits
4298                  * of our PSW copy. To avoid getting validity intercepts, we
4299                  * do only accept the condition code from userspace.
4300                  */
4301                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4302                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4303                                                    PSW_MASK_CC;
4304         }
4305
4306         kvm_run->kvm_dirty_regs = 0;
4307 }
4308
4309 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4310 {
4311         struct kvm_run *kvm_run = vcpu->run;
4312
4313         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4314         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4315         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4316         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4317         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4318         if (MACHINE_HAS_GS) {
4319                 preempt_disable();
4320                 __ctl_set_bit(2, 4);
4321                 if (vcpu->arch.gs_enabled)
4322                         save_gs_cb(current->thread.gs_cb);
4323                 current->thread.gs_cb = vcpu->arch.host_gscb;
4324                 restore_gs_cb(vcpu->arch.host_gscb);
4325                 if (!vcpu->arch.host_gscb)
4326                         __ctl_clear_bit(2, 4);
4327                 vcpu->arch.host_gscb = NULL;
4328                 preempt_enable();
4329         }
4330         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4331 }
4332
4333 static void store_regs(struct kvm_vcpu *vcpu)
4334 {
4335         struct kvm_run *kvm_run = vcpu->run;
4336
4337         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4338         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4339         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4340         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4341         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4342         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4343         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4344         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4345         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4346         save_access_regs(vcpu->run->s.regs.acrs);
4347         restore_access_regs(vcpu->arch.host_acrs);
4348         /* Save guest register state */
4349         save_fpu_regs();
4350         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4351         /* Restore will be done lazily at return */
4352         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4353         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4354         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4355                 store_regs_fmt2(vcpu);
4356 }
4357
4358 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4359 {
4360         struct kvm_run *kvm_run = vcpu->run;
4361         int rc;
4362
4363         if (kvm_run->immediate_exit)
4364                 return -EINTR;
4365
4366         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4367             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4368                 return -EINVAL;
4369
4370         vcpu_load(vcpu);
4371
4372         if (guestdbg_exit_pending(vcpu)) {
4373                 kvm_s390_prepare_debug_exit(vcpu);
4374                 rc = 0;
4375                 goto out;
4376         }
4377
4378         kvm_sigset_activate(vcpu);
4379
4380         /*
4381          * no need to check the return value of vcpu_start as it can only have
4382          * an error for protvirt, but protvirt means user cpu state
4383          */
4384         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4385                 kvm_s390_vcpu_start(vcpu);
4386         } else if (is_vcpu_stopped(vcpu)) {
4387                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4388                                    vcpu->vcpu_id);
4389                 rc = -EINVAL;
4390                 goto out;
4391         }
4392
4393         sync_regs(vcpu);
4394         enable_cpu_timer_accounting(vcpu);
4395
4396         might_fault();
4397         rc = __vcpu_run(vcpu);
4398
4399         if (signal_pending(current) && !rc) {
4400                 kvm_run->exit_reason = KVM_EXIT_INTR;
4401                 rc = -EINTR;
4402         }
4403
4404         if (guestdbg_exit_pending(vcpu) && !rc)  {
4405                 kvm_s390_prepare_debug_exit(vcpu);
4406                 rc = 0;
4407         }
4408
4409         if (rc == -EREMOTE) {
4410                 /* userspace support is needed, kvm_run has been prepared */
4411                 rc = 0;
4412         }
4413
4414         disable_cpu_timer_accounting(vcpu);
4415         store_regs(vcpu);
4416
4417         kvm_sigset_deactivate(vcpu);
4418
4419         vcpu->stat.exit_userspace++;
4420 out:
4421         vcpu_put(vcpu);
4422         return rc;
4423 }
4424
4425 /*
4426  * store status at address
4427  * we use have two special cases:
4428  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4429  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4430  */
4431 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4432 {
4433         unsigned char archmode = 1;
4434         freg_t fprs[NUM_FPRS];
4435         unsigned int px;
4436         u64 clkcomp, cputm;
4437         int rc;
4438
4439         px = kvm_s390_get_prefix(vcpu);
4440         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4441                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4442                         return -EFAULT;
4443                 gpa = 0;
4444         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4445                 if (write_guest_real(vcpu, 163, &archmode, 1))
4446                         return -EFAULT;
4447                 gpa = px;
4448         } else
4449                 gpa -= __LC_FPREGS_SAVE_AREA;
4450
4451         /* manually convert vector registers if necessary */
4452         if (MACHINE_HAS_VX) {
4453                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4454                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4455                                      fprs, 128);
4456         } else {
4457                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4458                                      vcpu->run->s.regs.fprs, 128);
4459         }
4460         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4461                               vcpu->run->s.regs.gprs, 128);
4462         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4463                               &vcpu->arch.sie_block->gpsw, 16);
4464         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4465                               &px, 4);
4466         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4467                               &vcpu->run->s.regs.fpc, 4);
4468         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4469                               &vcpu->arch.sie_block->todpr, 4);
4470         cputm = kvm_s390_get_cpu_timer(vcpu);
4471         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4472                               &cputm, 8);
4473         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4474         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4475                               &clkcomp, 8);
4476         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4477                               &vcpu->run->s.regs.acrs, 64);
4478         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4479                               &vcpu->arch.sie_block->gcr, 128);
4480         return rc ? -EFAULT : 0;
4481 }
4482
4483 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4484 {
4485         /*
4486          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4487          * switch in the run ioctl. Let's update our copies before we save
4488          * it into the save area
4489          */
4490         save_fpu_regs();
4491         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4492         save_access_regs(vcpu->run->s.regs.acrs);
4493
4494         return kvm_s390_store_status_unloaded(vcpu, addr);
4495 }
4496
4497 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4498 {
4499         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4500         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4501 }
4502
4503 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4504 {
4505         unsigned int i;
4506         struct kvm_vcpu *vcpu;
4507
4508         kvm_for_each_vcpu(i, vcpu, kvm) {
4509                 __disable_ibs_on_vcpu(vcpu);
4510         }
4511 }
4512
4513 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4514 {
4515         if (!sclp.has_ibs)
4516                 return;
4517         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4518         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4519 }
4520
4521 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4522 {
4523         int i, online_vcpus, r = 0, started_vcpus = 0;
4524
4525         if (!is_vcpu_stopped(vcpu))
4526                 return 0;
4527
4528         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4529         /* Only one cpu at a time may enter/leave the STOPPED state. */
4530         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4531         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4532
4533         /* Let's tell the UV that we want to change into the operating state */
4534         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4535                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4536                 if (r) {
4537                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4538                         return r;
4539                 }
4540         }
4541
4542         for (i = 0; i < online_vcpus; i++) {
4543                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4544                         started_vcpus++;
4545         }
4546
4547         if (started_vcpus == 0) {
4548                 /* we're the only active VCPU -> speed it up */
4549                 __enable_ibs_on_vcpu(vcpu);
4550         } else if (started_vcpus == 1) {
4551                 /*
4552                  * As we are starting a second VCPU, we have to disable
4553                  * the IBS facility on all VCPUs to remove potentially
4554                  * outstanding ENABLE requests.
4555                  */
4556                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4557         }
4558
4559         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4560         /*
4561          * The real PSW might have changed due to a RESTART interpreted by the
4562          * ultravisor. We block all interrupts and let the next sie exit
4563          * refresh our view.
4564          */
4565         if (kvm_s390_pv_cpu_is_protected(vcpu))
4566                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4567         /*
4568          * Another VCPU might have used IBS while we were offline.
4569          * Let's play safe and flush the VCPU at startup.
4570          */
4571         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4572         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4573         return 0;
4574 }
4575
4576 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4577 {
4578         int i, online_vcpus, r = 0, started_vcpus = 0;
4579         struct kvm_vcpu *started_vcpu = NULL;
4580
4581         if (is_vcpu_stopped(vcpu))
4582                 return 0;
4583
4584         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4585         /* Only one cpu at a time may enter/leave the STOPPED state. */
4586         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4587         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4588
4589         /* Let's tell the UV that we want to change into the stopped state */
4590         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4591                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4592                 if (r) {
4593                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4594                         return r;
4595                 }
4596         }
4597
4598         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4599         kvm_s390_clear_stop_irq(vcpu);
4600
4601         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4602         __disable_ibs_on_vcpu(vcpu);
4603
4604         for (i = 0; i < online_vcpus; i++) {
4605                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4606                         started_vcpus++;
4607                         started_vcpu = vcpu->kvm->vcpus[i];
4608                 }
4609         }
4610
4611         if (started_vcpus == 1) {
4612                 /*
4613                  * As we only have one VCPU left, we want to enable the
4614                  * IBS facility for that VCPU to speed it up.
4615                  */
4616                 __enable_ibs_on_vcpu(started_vcpu);
4617         }
4618
4619         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4620         return 0;
4621 }
4622
4623 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4624                                      struct kvm_enable_cap *cap)
4625 {
4626         int r;
4627
4628         if (cap->flags)
4629                 return -EINVAL;
4630
4631         switch (cap->cap) {
4632         case KVM_CAP_S390_CSS_SUPPORT:
4633                 if (!vcpu->kvm->arch.css_support) {
4634                         vcpu->kvm->arch.css_support = 1;
4635                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4636                         trace_kvm_s390_enable_css(vcpu->kvm);
4637                 }
4638                 r = 0;
4639                 break;
4640         default:
4641                 r = -EINVAL;
4642                 break;
4643         }
4644         return r;
4645 }
4646
4647 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4648                                    struct kvm_s390_mem_op *mop)
4649 {
4650         void __user *uaddr = (void __user *)mop->buf;
4651         int r = 0;
4652
4653         if (mop->flags || !mop->size)
4654                 return -EINVAL;
4655         if (mop->size + mop->sida_offset < mop->size)
4656                 return -EINVAL;
4657         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4658                 return -E2BIG;
4659
4660         switch (mop->op) {
4661         case KVM_S390_MEMOP_SIDA_READ:
4662                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4663                                  mop->sida_offset), mop->size))
4664                         r = -EFAULT;
4665
4666                 break;
4667         case KVM_S390_MEMOP_SIDA_WRITE:
4668                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4669                                    mop->sida_offset), uaddr, mop->size))
4670                         r = -EFAULT;
4671                 break;
4672         }
4673         return r;
4674 }
4675 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4676                                   struct kvm_s390_mem_op *mop)
4677 {
4678         void __user *uaddr = (void __user *)mop->buf;
4679         void *tmpbuf = NULL;
4680         int r = 0;
4681         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4682                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4683
4684         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4685                 return -EINVAL;
4686
4687         if (mop->size > MEM_OP_MAX_SIZE)
4688                 return -E2BIG;
4689
4690         if (kvm_s390_pv_cpu_is_protected(vcpu))
4691                 return -EINVAL;
4692
4693         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4694                 tmpbuf = vmalloc(mop->size);
4695                 if (!tmpbuf)
4696                         return -ENOMEM;
4697         }
4698
4699         switch (mop->op) {
4700         case KVM_S390_MEMOP_LOGICAL_READ:
4701                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4702                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4703                                             mop->size, GACC_FETCH);
4704                         break;
4705                 }
4706                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4707                 if (r == 0) {
4708                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4709                                 r = -EFAULT;
4710                 }
4711                 break;
4712         case KVM_S390_MEMOP_LOGICAL_WRITE:
4713                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4714                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4715                                             mop->size, GACC_STORE);
4716                         break;
4717                 }
4718                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4719                         r = -EFAULT;
4720                         break;
4721                 }
4722                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4723                 break;
4724         }
4725
4726         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4727                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4728
4729         vfree(tmpbuf);
4730         return r;
4731 }
4732
4733 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4734                                       struct kvm_s390_mem_op *mop)
4735 {
4736         int r, srcu_idx;
4737
4738         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4739
4740         switch (mop->op) {
4741         case KVM_S390_MEMOP_LOGICAL_READ:
4742         case KVM_S390_MEMOP_LOGICAL_WRITE:
4743                 r = kvm_s390_guest_mem_op(vcpu, mop);
4744                 break;
4745         case KVM_S390_MEMOP_SIDA_READ:
4746         case KVM_S390_MEMOP_SIDA_WRITE:
4747                 /* we are locked against sida going away by the vcpu->mutex */
4748                 r = kvm_s390_guest_sida_op(vcpu, mop);
4749                 break;
4750         default:
4751                 r = -EINVAL;
4752         }
4753
4754         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4755         return r;
4756 }
4757
4758 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4759                                unsigned int ioctl, unsigned long arg)
4760 {
4761         struct kvm_vcpu *vcpu = filp->private_data;
4762         void __user *argp = (void __user *)arg;
4763
4764         switch (ioctl) {
4765         case KVM_S390_IRQ: {
4766                 struct kvm_s390_irq s390irq;
4767
4768                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4769                         return -EFAULT;
4770                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4771         }
4772         case KVM_S390_INTERRUPT: {
4773                 struct kvm_s390_interrupt s390int;
4774                 struct kvm_s390_irq s390irq = {};
4775
4776                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4777                         return -EFAULT;
4778                 if (s390int_to_s390irq(&s390int, &s390irq))
4779                         return -EINVAL;
4780                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4781         }
4782         }
4783         return -ENOIOCTLCMD;
4784 }
4785
4786 long kvm_arch_vcpu_ioctl(struct file *filp,
4787                          unsigned int ioctl, unsigned long arg)
4788 {
4789         struct kvm_vcpu *vcpu = filp->private_data;
4790         void __user *argp = (void __user *)arg;
4791         int idx;
4792         long r;
4793         u16 rc, rrc;
4794
4795         vcpu_load(vcpu);
4796
4797         switch (ioctl) {
4798         case KVM_S390_STORE_STATUS:
4799                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4800                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4801                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4802                 break;
4803         case KVM_S390_SET_INITIAL_PSW: {
4804                 psw_t psw;
4805
4806                 r = -EFAULT;
4807                 if (copy_from_user(&psw, argp, sizeof(psw)))
4808                         break;
4809                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4810                 break;
4811         }
4812         case KVM_S390_CLEAR_RESET:
4813                 r = 0;
4814                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4815                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4816                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4817                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4818                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4819                                    rc, rrc);
4820                 }
4821                 break;
4822         case KVM_S390_INITIAL_RESET:
4823                 r = 0;
4824                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4825                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4826                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4827                                           UVC_CMD_CPU_RESET_INITIAL,
4828                                           &rc, &rrc);
4829                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4830                                    rc, rrc);
4831                 }
4832                 break;
4833         case KVM_S390_NORMAL_RESET:
4834                 r = 0;
4835                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4836                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4839                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4840                                    rc, rrc);
4841                 }
4842                 break;
4843         case KVM_SET_ONE_REG:
4844         case KVM_GET_ONE_REG: {
4845                 struct kvm_one_reg reg;
4846                 r = -EINVAL;
4847                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4848                         break;
4849                 r = -EFAULT;
4850                 if (copy_from_user(&reg, argp, sizeof(reg)))
4851                         break;
4852                 if (ioctl == KVM_SET_ONE_REG)
4853                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4854                 else
4855                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4856                 break;
4857         }
4858 #ifdef CONFIG_KVM_S390_UCONTROL
4859         case KVM_S390_UCAS_MAP: {
4860                 struct kvm_s390_ucas_mapping ucasmap;
4861
4862                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4863                         r = -EFAULT;
4864                         break;
4865                 }
4866
4867                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4868                         r = -EINVAL;
4869                         break;
4870                 }
4871
4872                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4873                                      ucasmap.vcpu_addr, ucasmap.length);
4874                 break;
4875         }
4876         case KVM_S390_UCAS_UNMAP: {
4877                 struct kvm_s390_ucas_mapping ucasmap;
4878
4879                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4880                         r = -EFAULT;
4881                         break;
4882                 }
4883
4884                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4885                         r = -EINVAL;
4886                         break;
4887                 }
4888
4889                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4890                         ucasmap.length);
4891                 break;
4892         }
4893 #endif
4894         case KVM_S390_VCPU_FAULT: {
4895                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4896                 break;
4897         }
4898         case KVM_ENABLE_CAP:
4899         {
4900                 struct kvm_enable_cap cap;
4901                 r = -EFAULT;
4902                 if (copy_from_user(&cap, argp, sizeof(cap)))
4903                         break;
4904                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4905                 break;
4906         }
4907         case KVM_S390_MEM_OP: {
4908                 struct kvm_s390_mem_op mem_op;
4909
4910                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4911                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4912                 else
4913                         r = -EFAULT;
4914                 break;
4915         }
4916         case KVM_S390_SET_IRQ_STATE: {
4917                 struct kvm_s390_irq_state irq_state;
4918
4919                 r = -EFAULT;
4920                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4921                         break;
4922                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4923                     irq_state.len == 0 ||
4924                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4925                         r = -EINVAL;
4926                         break;
4927                 }
4928                 /* do not use irq_state.flags, it will break old QEMUs */
4929                 r = kvm_s390_set_irq_state(vcpu,
4930                                            (void __user *) irq_state.buf,
4931                                            irq_state.len);
4932                 break;
4933         }
4934         case KVM_S390_GET_IRQ_STATE: {
4935                 struct kvm_s390_irq_state irq_state;
4936
4937                 r = -EFAULT;
4938                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4939                         break;
4940                 if (irq_state.len == 0) {
4941                         r = -EINVAL;
4942                         break;
4943                 }
4944                 /* do not use irq_state.flags, it will break old QEMUs */
4945                 r = kvm_s390_get_irq_state(vcpu,
4946                                            (__u8 __user *)  irq_state.buf,
4947                                            irq_state.len);
4948                 break;
4949         }
4950         default:
4951                 r = -ENOTTY;
4952         }
4953
4954         vcpu_put(vcpu);
4955         return r;
4956 }
4957
4958 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4959 {
4960 #ifdef CONFIG_KVM_S390_UCONTROL
4961         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4962                  && (kvm_is_ucontrol(vcpu->kvm))) {
4963                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4964                 get_page(vmf->page);
4965                 return 0;
4966         }
4967 #endif
4968         return VM_FAULT_SIGBUS;
4969 }
4970
4971 /* Section: memory related */
4972 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4973                                    struct kvm_memory_slot *memslot,
4974                                    const struct kvm_userspace_memory_region *mem,
4975                                    enum kvm_mr_change change)
4976 {
4977         /* A few sanity checks. We can have memory slots which have to be
4978            located/ended at a segment boundary (1MB). The memory in userland is
4979            ok to be fragmented into various different vmas. It is okay to mmap()
4980            and munmap() stuff in this slot after doing this call at any time */
4981
4982         if (mem->userspace_addr & 0xffffful)
4983                 return -EINVAL;
4984
4985         if (mem->memory_size & 0xffffful)
4986                 return -EINVAL;
4987
4988         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4989                 return -EINVAL;
4990
4991         /* When we are protected, we should not change the memory slots */
4992         if (kvm_s390_pv_get_handle(kvm))
4993                 return -EINVAL;
4994         return 0;
4995 }
4996
4997 void kvm_arch_commit_memory_region(struct kvm *kvm,
4998                                 const struct kvm_userspace_memory_region *mem,
4999                                 struct kvm_memory_slot *old,
5000                                 const struct kvm_memory_slot *new,
5001                                 enum kvm_mr_change change)
5002 {
5003         int rc = 0;
5004
5005         switch (change) {
5006         case KVM_MR_DELETE:
5007                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5008                                         old->npages * PAGE_SIZE);
5009                 break;
5010         case KVM_MR_MOVE:
5011                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5012                                         old->npages * PAGE_SIZE);
5013                 if (rc)
5014                         break;
5015                 fallthrough;
5016         case KVM_MR_CREATE:
5017                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5018                                       mem->guest_phys_addr, mem->memory_size);
5019                 break;
5020         case KVM_MR_FLAGS_ONLY:
5021                 break;
5022         default:
5023                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5024         }
5025         if (rc)
5026                 pr_warn("failed to commit memory region\n");
5027         return;
5028 }
5029
5030 static inline unsigned long nonhyp_mask(int i)
5031 {
5032         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5033
5034         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5035 }
5036
5037 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5038 {
5039         vcpu->valid_wakeup = false;
5040 }
5041
5042 static int __init kvm_s390_init(void)
5043 {
5044         int i;
5045
5046         if (!sclp.has_sief2) {
5047                 pr_info("SIE is not available\n");
5048                 return -ENODEV;
5049         }
5050
5051         if (nested && hpage) {
5052                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5053                 return -EINVAL;
5054         }
5055
5056         for (i = 0; i < 16; i++)
5057                 kvm_s390_fac_base[i] |=
5058                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5059
5060         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5061 }
5062
5063 static void __exit kvm_s390_exit(void)
5064 {
5065         kvm_exit();
5066 }
5067
5068 module_init(kvm_s390_init);
5069 module_exit(kvm_s390_exit);
5070
5071 /*
5072  * Enable autoloading of the kvm module.
5073  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5074  * since x86 takes a different approach.
5075  */
5076 #include <linux/miscdevice.h>
5077 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5078 MODULE_ALIAS("devname:kvm");