arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/pgtable.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include "kvm-s390.h"
  49 #include "gaccess.h"
  50
  51 #define CREATE_TRACE_POINTS
  52 #include "trace.h"
  53 #include "trace-s390.h"
  54
  55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  56 #define LOCAL_IRQS 32
  57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  59
  60 struct kvm_stats_debugfs_item debugfs_entries[] = {
  61         VCPU_STAT("userspace_handled", exit_userspace),
  62         VCPU_STAT("exit_null", exit_null),
  63         VCPU_STAT("exit_validity", exit_validity),
  64         VCPU_STAT("exit_stop_request", exit_stop_request),
  65         VCPU_STAT("exit_external_request", exit_external_request),
  66         VCPU_STAT("exit_io_request", exit_io_request),
  67         VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
  68         VCPU_STAT("exit_instruction", exit_instruction),
  69         VCPU_STAT("exit_pei", exit_pei),
  70         VCPU_STAT("exit_program_interruption", exit_program_interruption),
  71         VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
  72         VCPU_STAT("exit_operation_exception", exit_operation_exception),
  73         VCPU_STAT("halt_successful_poll", halt_successful_poll),
  74         VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
  75         VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
  76         VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
  77         VCPU_STAT("halt_wakeup", halt_wakeup),
  78         VCPU_STAT("instruction_lctlg", instruction_lctlg),
  79         VCPU_STAT("instruction_lctl", instruction_lctl),
  80         VCPU_STAT("instruction_stctl", instruction_stctl),
  81         VCPU_STAT("instruction_stctg", instruction_stctg),
  82         VCPU_STAT("deliver_ckc", deliver_ckc),
  83         VCPU_STAT("deliver_cputm", deliver_cputm),
  84         VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
  85         VCPU_STAT("deliver_external_call", deliver_external_call),
  86         VCPU_STAT("deliver_service_signal", deliver_service_signal),
  87         VCPU_STAT("deliver_virtio", deliver_virtio),
  88         VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
  89         VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
  90         VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
  91         VCPU_STAT("deliver_program", deliver_program),
  92         VCPU_STAT("deliver_io", deliver_io),
  93         VCPU_STAT("deliver_machine_check", deliver_machine_check),
  94         VCPU_STAT("exit_wait_state", exit_wait_state),
  95         VCPU_STAT("inject_ckc", inject_ckc),
  96         VCPU_STAT("inject_cputm", inject_cputm),
  97         VCPU_STAT("inject_external_call", inject_external_call),
  98         VM_STAT("inject_float_mchk", inject_float_mchk),
  99         VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
 100         VM_STAT("inject_io", inject_io),
 101         VCPU_STAT("inject_mchk", inject_mchk),
 102         VM_STAT("inject_pfault_done", inject_pfault_done),
 103         VCPU_STAT("inject_program", inject_program),
 104         VCPU_STAT("inject_restart", inject_restart),
 105         VM_STAT("inject_service_signal", inject_service_signal),
 106         VCPU_STAT("inject_set_prefix", inject_set_prefix),
 107         VCPU_STAT("inject_stop_signal", inject_stop_signal),
 108         VCPU_STAT("inject_pfault_init", inject_pfault_init),
 109         VM_STAT("inject_virtio", inject_virtio),
 110         VCPU_STAT("instruction_epsw", instruction_epsw),
 111         VCPU_STAT("instruction_gs", instruction_gs),
 112         VCPU_STAT("instruction_io_other", instruction_io_other),
 113         VCPU_STAT("instruction_lpsw", instruction_lpsw),
 114         VCPU_STAT("instruction_lpswe", instruction_lpswe),
 115         VCPU_STAT("instruction_pfmf", instruction_pfmf),
 116         VCPU_STAT("instruction_ptff", instruction_ptff),
 117         VCPU_STAT("instruction_stidp", instruction_stidp),
 118         VCPU_STAT("instruction_sck", instruction_sck),
 119         VCPU_STAT("instruction_sckpf", instruction_sckpf),
 120         VCPU_STAT("instruction_spx", instruction_spx),
 121         VCPU_STAT("instruction_stpx", instruction_stpx),
 122         VCPU_STAT("instruction_stap", instruction_stap),
 123         VCPU_STAT("instruction_iske", instruction_iske),
 124         VCPU_STAT("instruction_ri", instruction_ri),
 125         VCPU_STAT("instruction_rrbe", instruction_rrbe),
 126         VCPU_STAT("instruction_sske", instruction_sske),
 127         VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
 128         VCPU_STAT("instruction_essa", instruction_essa),
 129         VCPU_STAT("instruction_stsi", instruction_stsi),
 130         VCPU_STAT("instruction_stfl", instruction_stfl),
 131         VCPU_STAT("instruction_tb", instruction_tb),
 132         VCPU_STAT("instruction_tpi", instruction_tpi),
 133         VCPU_STAT("instruction_tprot", instruction_tprot),
 134         VCPU_STAT("instruction_tsch", instruction_tsch),
 135         VCPU_STAT("instruction_sthyi", instruction_sthyi),
 136         VCPU_STAT("instruction_sie", instruction_sie),
 137         VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
 138         VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
 139         VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
 140         VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
 141         VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
 142         VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
 143         VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
 144         VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
 145         VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
 146         VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
 147         VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
 148         VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
 149         VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
 150         VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
 151         VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
 152         VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
 153         VCPU_STAT("instruction_diag_10", diagnose_10),
 154         VCPU_STAT("instruction_diag_44", diagnose_44),
 155         VCPU_STAT("instruction_diag_9c", diagnose_9c),
 156         VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
 157         VCPU_STAT("instruction_diag_258", diagnose_258),
 158         VCPU_STAT("instruction_diag_308", diagnose_308),
 159         VCPU_STAT("instruction_diag_500", diagnose_500),
 160         VCPU_STAT("instruction_diag_other", diagnose_other),
 161         { NULL }
 162 };
 163
 164 struct kvm_s390_tod_clock_ext {
 165         __u8 epoch_idx;
 166         __u64 tod;
 167         __u8 reserved[7];
 168 } __packed;
 169
 170 /* allow nested virtualization in KVM (if enabled by user space) */
 171 static int nested;
 172 module_param(nested, int, S_IRUGO);
 173 MODULE_PARM_DESC(nested, "Nested virtualization support");
 174
 175 /* allow 1m huge page guest backing, if !nested */
 176 static int hpage;
 177 module_param(hpage, int, 0444);
 178 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 179
 180 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 181 static u8 halt_poll_max_steal = 10;
 182 module_param(halt_poll_max_steal, byte, 0644);
 183 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 184
 185 /* if set to true, the GISA will be initialized and used if available */
 186 static bool use_gisa  = true;
 187 module_param(use_gisa, bool, 0644);
 188 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 189
 190 /*
 191  * For now we handle at most 16 double words as this is what the s390 base
 192  * kernel handles and stores in the prefix page. If we ever need to go beyond
 193  * this, this requires changes to code, but the external uapi can stay.
 194  */
 195 #define SIZE_INTERNAL 16
 196
 197 /*
 198  * Base feature mask that defines default mask for facilities. Consists of the
 199  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 200  */
 201 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 202 /*
 203  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 204  * and defines the facilities that can be enabled via a cpu model.
 205  */
 206 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 207
 208 static unsigned long kvm_s390_fac_size(void)
 209 {
 210         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 211         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 212         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 213                 sizeof(S390_lowcore.stfle_fac_list));
 214
 215         return SIZE_INTERNAL;
 216 }
 217
 218 /* available cpu features supported by kvm */
 219 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 220 /* available subfunctions indicated via query / "test bit" */
 221 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 222
 223 static struct gmap_notifier gmap_notifier;
 224 static struct gmap_notifier vsie_gmap_notifier;
 225 debug_info_t *kvm_s390_dbf;
 226 debug_info_t *kvm_s390_dbf_uv;
 227
 228 /* Section: not file related */
 229 int kvm_arch_hardware_enable(void)
 230 {
 231         /* every s390 is virtualization enabled ;-) */
 232         return 0;
 233 }
 234
 235 int kvm_arch_check_processor_compat(void *opaque)
 236 {
 237         return 0;
 238 }
 239
 240 /* forward declarations */
 241 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 242                               unsigned long end);
 243 static int sca_switch_to_extended(struct kvm *kvm);
 244
 245 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 246 {
 247         u8 delta_idx = 0;
 248
 249         /*
 250          * The TOD jumps by delta, we have to compensate this by adding
 251          * -delta to the epoch.
 252          */
 253         delta = -delta;
 254
 255         /* sign-extension - we're adding to signed values below */
 256         if ((s64)delta < 0)
 257                 delta_idx = -1;
 258
 259         scb->epoch += delta;
 260         if (scb->ecd & ECD_MEF) {
 261                 scb->epdx += delta_idx;
 262                 if (scb->epoch < delta)
 263                         scb->epdx += 1;
 264         }
 265 }
 266
 267 /*
 268  * This callback is executed during stop_machine(). All CPUs are therefore
 269  * temporarily stopped. In order not to change guest behavior, we have to
 270  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 271  * so a CPU won't be stopped while calculating with the epoch.
 272  */
 273 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 274                           void *v)
 275 {
 276         struct kvm *kvm;
 277         struct kvm_vcpu *vcpu;
 278         int i;
 279         unsigned long long *delta = v;
 280
 281         list_for_each_entry(kvm, &vm_list, vm_list) {
 282                 kvm_for_each_vcpu(i, vcpu, kvm) {
 283                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 284                         if (i == 0) {
 285                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 286                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 287                         }
 288                         if (vcpu->arch.cputm_enabled)
 289                                 vcpu->arch.cputm_start += *delta;
 290                         if (vcpu->arch.vsie_block)
 291                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 292                                                    *delta);
 293                 }
 294         }
 295         return NOTIFY_OK;
 296 }
 297
 298 static struct notifier_block kvm_clock_notifier = {
 299         .notifier_call = kvm_clock_sync,
 300 };
 301
 302 int kvm_arch_hardware_setup(void *opaque)
 303 {
 304         gmap_notifier.notifier_call = kvm_gmap_notifier;
 305         gmap_register_pte_notifier(&gmap_notifier);
 306         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 307         gmap_register_pte_notifier(&vsie_gmap_notifier);
 308         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 309                                        &kvm_clock_notifier);
 310         return 0;
 311 }
 312
 313 void kvm_arch_hardware_unsetup(void)
 314 {
 315         gmap_unregister_pte_notifier(&gmap_notifier);
 316         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 317         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 318                                          &kvm_clock_notifier);
 319 }
 320
 321 static void allow_cpu_feat(unsigned long nr)
 322 {
 323         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 324 }
 325
 326 static inline int plo_test_bit(unsigned char nr)
 327 {
 328         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 329         int cc;
 330
 331         asm volatile(
 332                 /* Parameter registers are ignored for "test bit" */
 333                 "       plo     0,0,0,0(0)\n"
 334                 "       ipm     %0\n"
 335                 "       srl     %0,28\n"
 336                 : "=d" (cc)
 337                 : "d" (r0)
 338                 : "cc");
 339         return cc == 0;
 340 }
 341
 342 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 343 {
 344         register unsigned long r0 asm("0") = 0; /* query function */
 345         register unsigned long r1 asm("1") = (unsigned long) query;
 346
 347         asm volatile(
 348                 /* Parameter regs are ignored */
 349                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 350                 :
 351                 : "d" (r0), "a" (r1), [opc] "i" (opcode)
 352                 : "cc", "memory");
 353 }
 354
 355 #define INSN_SORTL 0xb938
 356 #define INSN_DFLTCC 0xb939
 357
 358 static void kvm_s390_cpu_feat_init(void)
 359 {
 360         int i;
 361
 362         for (i = 0; i < 256; ++i) {
 363                 if (plo_test_bit(i))
 364                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 365         }
 366
 367         if (test_facility(28)) /* TOD-clock steering */
 368                 ptff(kvm_s390_available_subfunc.ptff,
 369                      sizeof(kvm_s390_available_subfunc.ptff),
 370                      PTFF_QAF);
 371
 372         if (test_facility(17)) { /* MSA */
 373                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 374                               kvm_s390_available_subfunc.kmac);
 375                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 376                               kvm_s390_available_subfunc.kmc);
 377                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 378                               kvm_s390_available_subfunc.km);
 379                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 380                               kvm_s390_available_subfunc.kimd);
 381                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 382                               kvm_s390_available_subfunc.klmd);
 383         }
 384         if (test_facility(76)) /* MSA3 */
 385                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 386                               kvm_s390_available_subfunc.pckmo);
 387         if (test_facility(77)) { /* MSA4 */
 388                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 389                               kvm_s390_available_subfunc.kmctr);
 390                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 391                               kvm_s390_available_subfunc.kmf);
 392                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.kmo);
 394                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 395                               kvm_s390_available_subfunc.pcc);
 396         }
 397         if (test_facility(57)) /* MSA5 */
 398                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 399                               kvm_s390_available_subfunc.ppno);
 400
 401         if (test_facility(146)) /* MSA8 */
 402                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 403                               kvm_s390_available_subfunc.kma);
 404
 405         if (test_facility(155)) /* MSA9 */
 406                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 407                               kvm_s390_available_subfunc.kdsa);
 408
 409         if (test_facility(150)) /* SORTL */
 410                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 411
 412         if (test_facility(151)) /* DFLTCC */
 413                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 414
 415         if (MACHINE_HAS_ESOP)
 416                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 417         /*
 418          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 419          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 420          */
 421         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 422             !test_facility(3) || !nested)
 423                 return;
 424         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 425         if (sclp.has_64bscao)
 426                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 427         if (sclp.has_siif)
 428                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 429         if (sclp.has_gpere)
 430                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 431         if (sclp.has_gsls)
 432                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 433         if (sclp.has_ib)
 434                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 435         if (sclp.has_cei)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 437         if (sclp.has_ibs)
 438                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 439         if (sclp.has_kss)
 440                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 441         /*
 442          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 443          * all skey handling functions read/set the skey from the PGSTE
 444          * instead of the real storage key.
 445          *
 446          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 447          * pages being detected as preserved although they are resident.
 448          *
 449          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 450          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 451          *
 452          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 453          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 454          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 455          *
 456          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 457          * cannot easily shadow the SCA because of the ipte lock.
 458          */
 459 }
 460
 461 int kvm_arch_init(void *opaque)
 462 {
 463         int rc = -ENOMEM;
 464
 465         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 466         if (!kvm_s390_dbf)
 467                 return -ENOMEM;
 468
 469         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 470         if (!kvm_s390_dbf_uv)
 471                 goto out;
 472
 473         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 474             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 475                 goto out;
 476
 477         kvm_s390_cpu_feat_init();
 478
 479         /* Register floating interrupt controller interface. */
 480         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 481         if (rc) {
 482                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 483                 goto out;
 484         }
 485
 486         rc = kvm_s390_gib_init(GAL_ISC);
 487         if (rc)
 488                 goto out;
 489
 490         return 0;
 491
 492 out:
 493         kvm_arch_exit();
 494         return rc;
 495 }
 496
 497 void kvm_arch_exit(void)
 498 {
 499         kvm_s390_gib_destroy();
 500         debug_unregister(kvm_s390_dbf);
 501         debug_unregister(kvm_s390_dbf_uv);
 502 }
 503
 504 /* Section: device related */
 505 long kvm_arch_dev_ioctl(struct file *filp,
 506                         unsigned int ioctl, unsigned long arg)
 507 {
 508         if (ioctl == KVM_S390_ENABLE_SIE)
 509                 return s390_enable_sie();
 510         return -EINVAL;
 511 }
 512
 513 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 514 {
 515         int r;
 516
 517         switch (ext) {
 518         case KVM_CAP_S390_PSW:
 519         case KVM_CAP_S390_GMAP:
 520         case KVM_CAP_SYNC_MMU:
 521 #ifdef CONFIG_KVM_S390_UCONTROL
 522         case KVM_CAP_S390_UCONTROL:
 523 #endif
 524         case KVM_CAP_ASYNC_PF:
 525         case KVM_CAP_SYNC_REGS:
 526         case KVM_CAP_ONE_REG:
 527         case KVM_CAP_ENABLE_CAP:
 528         case KVM_CAP_S390_CSS_SUPPORT:
 529         case KVM_CAP_IOEVENTFD:
 530         case KVM_CAP_DEVICE_CTRL:
 531         case KVM_CAP_S390_IRQCHIP:
 532         case KVM_CAP_VM_ATTRIBUTES:
 533         case KVM_CAP_MP_STATE:
 534         case KVM_CAP_IMMEDIATE_EXIT:
 535         case KVM_CAP_S390_INJECT_IRQ:
 536         case KVM_CAP_S390_USER_SIGP:
 537         case KVM_CAP_S390_USER_STSI:
 538         case KVM_CAP_S390_SKEYS:
 539         case KVM_CAP_S390_IRQ_STATE:
 540         case KVM_CAP_S390_USER_INSTR0:
 541         case KVM_CAP_S390_CMMA_MIGRATION:
 542         case KVM_CAP_S390_AIS:
 543         case KVM_CAP_S390_AIS_MIGRATION:
 544         case KVM_CAP_S390_VCPU_RESETS:
 545         case KVM_CAP_SET_GUEST_DEBUG:
 546                 r = 1;
 547                 break;
 548         case KVM_CAP_S390_HPAGE_1M:
 549                 r = 0;
 550                 if (hpage && !kvm_is_ucontrol(kvm))
 551                         r = 1;
 552                 break;
 553         case KVM_CAP_S390_MEM_OP:
 554                 r = MEM_OP_MAX_SIZE;
 555                 break;
 556         case KVM_CAP_NR_VCPUS:
 557         case KVM_CAP_MAX_VCPUS:
 558         case KVM_CAP_MAX_VCPU_ID:
 559                 r = KVM_S390_BSCA_CPU_SLOTS;
 560                 if (!kvm_s390_use_sca_entries())
 561                         r = KVM_MAX_VCPUS;
 562                 else if (sclp.has_esca && sclp.has_64bscao)
 563                         r = KVM_S390_ESCA_CPU_SLOTS;
 564                 break;
 565         case KVM_CAP_S390_COW:
 566                 r = MACHINE_HAS_ESOP;
 567                 break;
 568         case KVM_CAP_S390_VECTOR_REGISTERS:
 569                 r = MACHINE_HAS_VX;
 570                 break;
 571         case KVM_CAP_S390_RI:
 572                 r = test_facility(64);
 573                 break;
 574         case KVM_CAP_S390_GS:
 575                 r = test_facility(133);
 576                 break;
 577         case KVM_CAP_S390_BPB:
 578                 r = test_facility(82);
 579                 break;
 580         case KVM_CAP_S390_PROTECTED:
 581                 r = is_prot_virt_host();
 582                 break;
 583         default:
 584                 r = 0;
 585         }
 586         return r;
 587 }
 588
 589 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 590 {
 591         int i;
 592         gfn_t cur_gfn, last_gfn;
 593         unsigned long gaddr, vmaddr;
 594         struct gmap *gmap = kvm->arch.gmap;
 595         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 596
 597         /* Loop over all guest segments */
 598         cur_gfn = memslot->base_gfn;
 599         last_gfn = memslot->base_gfn + memslot->npages;
 600         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 601                 gaddr = gfn_to_gpa(cur_gfn);
 602                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 603                 if (kvm_is_error_hva(vmaddr))
 604                         continue;
 605
 606                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 607                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 608                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 609                         if (test_bit(i, bitmap))
 610                                 mark_page_dirty(kvm, cur_gfn + i);
 611                 }
 612
 613                 if (fatal_signal_pending(current))
 614                         return;
 615                 cond_resched();
 616         }
 617 }
 618
 619 /* Section: vm related */
 620 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 621
 622 /*
 623  * Get (and clear) the dirty memory log for a memory slot.
 624  */
 625 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 626                                struct kvm_dirty_log *log)
 627 {
 628         int r;
 629         unsigned long n;
 630         struct kvm_memory_slot *memslot;
 631         int is_dirty;
 632
 633         if (kvm_is_ucontrol(kvm))
 634                 return -EINVAL;
 635
 636         mutex_lock(&kvm->slots_lock);
 637
 638         r = -EINVAL;
 639         if (log->slot >= KVM_USER_MEM_SLOTS)
 640                 goto out;
 641
 642         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 643         if (r)
 644                 goto out;
 645
 646         /* Clear the dirty log */
 647         if (is_dirty) {
 648                 n = kvm_dirty_bitmap_bytes(memslot);
 649                 memset(memslot->dirty_bitmap, 0, n);
 650         }
 651         r = 0;
 652 out:
 653         mutex_unlock(&kvm->slots_lock);
 654         return r;
 655 }
 656
 657 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 658 {
 659         unsigned int i;
 660         struct kvm_vcpu *vcpu;
 661
 662         kvm_for_each_vcpu(i, vcpu, kvm) {
 663                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 664         }
 665 }
 666
 667 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 668 {
 669         int r;
 670
 671         if (cap->flags)
 672                 return -EINVAL;
 673
 674         switch (cap->cap) {
 675         case KVM_CAP_S390_IRQCHIP:
 676                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 677                 kvm->arch.use_irqchip = 1;
 678                 r = 0;
 679                 break;
 680         case KVM_CAP_S390_USER_SIGP:
 681                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 682                 kvm->arch.user_sigp = 1;
 683                 r = 0;
 684                 break;
 685         case KVM_CAP_S390_VECTOR_REGISTERS:
 686                 mutex_lock(&kvm->lock);
 687                 if (kvm->created_vcpus) {
 688                         r = -EBUSY;
 689                 } else if (MACHINE_HAS_VX) {
 690                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 691                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 692                         if (test_facility(134)) {
 693                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 694                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 695                         }
 696                         if (test_facility(135)) {
 697                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 698                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 699                         }
 700                         if (test_facility(148)) {
 701                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 702                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 703                         }
 704                         if (test_facility(152)) {
 705                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 706                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 707                         }
 708                         r = 0;
 709                 } else
 710                         r = -EINVAL;
 711                 mutex_unlock(&kvm->lock);
 712                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 713                          r ? "(not available)" : "(success)");
 714                 break;
 715         case KVM_CAP_S390_RI:
 716                 r = -EINVAL;
 717                 mutex_lock(&kvm->lock);
 718                 if (kvm->created_vcpus) {
 719                         r = -EBUSY;
 720                 } else if (test_facility(64)) {
 721                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 722                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 723                         r = 0;
 724                 }
 725                 mutex_unlock(&kvm->lock);
 726                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 727                          r ? "(not available)" : "(success)");
 728                 break;
 729         case KVM_CAP_S390_AIS:
 730                 mutex_lock(&kvm->lock);
 731                 if (kvm->created_vcpus) {
 732                         r = -EBUSY;
 733                 } else {
 734                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 735                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 736                         r = 0;
 737                 }
 738                 mutex_unlock(&kvm->lock);
 739                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 740                          r ? "(not available)" : "(success)");
 741                 break;
 742         case KVM_CAP_S390_GS:
 743                 r = -EINVAL;
 744                 mutex_lock(&kvm->lock);
 745                 if (kvm->created_vcpus) {
 746                         r = -EBUSY;
 747                 } else if (test_facility(133)) {
 748                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 749                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 750                         r = 0;
 751                 }
 752                 mutex_unlock(&kvm->lock);
 753                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 754                          r ? "(not available)" : "(success)");
 755                 break;
 756         case KVM_CAP_S390_HPAGE_1M:
 757                 mutex_lock(&kvm->lock);
 758                 if (kvm->created_vcpus)
 759                         r = -EBUSY;
 760                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 761                         r = -EINVAL;
 762                 else {
 763                         r = 0;
 764                         down_write(&kvm->mm->mmap_sem);
 765                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 766                         up_write(&kvm->mm->mmap_sem);
 767                         /*
 768                          * We might have to create fake 4k page
 769                          * tables. To avoid that the hardware works on
 770                          * stale PGSTEs, we emulate these instructions.
 771                          */
 772                         kvm->arch.use_skf = 0;
 773                         kvm->arch.use_pfmfi = 0;
 774                 }
 775                 mutex_unlock(&kvm->lock);
 776                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 777                          r ? "(not available)" : "(success)");
 778                 break;
 779         case KVM_CAP_S390_USER_STSI:
 780                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 781                 kvm->arch.user_stsi = 1;
 782                 r = 0;
 783                 break;
 784         case KVM_CAP_S390_USER_INSTR0:
 785                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 786                 kvm->arch.user_instr0 = 1;
 787                 icpt_operexc_on_all_vcpus(kvm);
 788                 r = 0;
 789                 break;
 790         default:
 791                 r = -EINVAL;
 792                 break;
 793         }
 794         return r;
 795 }
 796
 797 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 798 {
 799         int ret;
 800
 801         switch (attr->attr) {
 802         case KVM_S390_VM_MEM_LIMIT_SIZE:
 803                 ret = 0;
 804                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 805                          kvm->arch.mem_limit);
 806                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 807                         ret = -EFAULT;
 808                 break;
 809         default:
 810                 ret = -ENXIO;
 811                 break;
 812         }
 813         return ret;
 814 }
 815
 816 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 817 {
 818         int ret;
 819         unsigned int idx;
 820         switch (attr->attr) {
 821         case KVM_S390_VM_MEM_ENABLE_CMMA:
 822                 ret = -ENXIO;
 823                 if (!sclp.has_cmma)
 824                         break;
 825
 826                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 827                 mutex_lock(&kvm->lock);
 828                 if (kvm->created_vcpus)
 829                         ret = -EBUSY;
 830                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 831                         ret = -EINVAL;
 832                 else {
 833                         kvm->arch.use_cmma = 1;
 834                         /* Not compatible with cmma. */
 835                         kvm->arch.use_pfmfi = 0;
 836                         ret = 0;
 837                 }
 838                 mutex_unlock(&kvm->lock);
 839                 break;
 840         case KVM_S390_VM_MEM_CLR_CMMA:
 841                 ret = -ENXIO;
 842                 if (!sclp.has_cmma)
 843                         break;
 844                 ret = -EINVAL;
 845                 if (!kvm->arch.use_cmma)
 846                         break;
 847
 848                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 849                 mutex_lock(&kvm->lock);
 850                 idx = srcu_read_lock(&kvm->srcu);
 851                 s390_reset_cmma(kvm->arch.gmap->mm);
 852                 srcu_read_unlock(&kvm->srcu, idx);
 853                 mutex_unlock(&kvm->lock);
 854                 ret = 0;
 855                 break;
 856         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 857                 unsigned long new_limit;
 858
 859                 if (kvm_is_ucontrol(kvm))
 860                         return -EINVAL;
 861
 862                 if (get_user(new_limit, (u64 __user *)attr->addr))
 863                         return -EFAULT;
 864
 865                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 866                     new_limit > kvm->arch.mem_limit)
 867                         return -E2BIG;
 868
 869                 if (!new_limit)
 870                         return -EINVAL;
 871
 872                 /* gmap_create takes last usable address */
 873                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 874                         new_limit -= 1;
 875
 876                 ret = -EBUSY;
 877                 mutex_lock(&kvm->lock);
 878                 if (!kvm->created_vcpus) {
 879                         /* gmap_create will round the limit up */
 880                         struct gmap *new = gmap_create(current->mm, new_limit);
 881
 882                         if (!new) {
 883                                 ret = -ENOMEM;
 884                         } else {
 885                                 gmap_remove(kvm->arch.gmap);
 886                                 new->private = kvm;
 887                                 kvm->arch.gmap = new;
 888                                 ret = 0;
 889                         }
 890                 }
 891                 mutex_unlock(&kvm->lock);
 892                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 893                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 894                          (void *) kvm->arch.gmap->asce);
 895                 break;
 896         }
 897         default:
 898                 ret = -ENXIO;
 899                 break;
 900         }
 901         return ret;
 902 }
 903
 904 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 905
 906 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 907 {
 908         struct kvm_vcpu *vcpu;
 909         int i;
 910
 911         kvm_s390_vcpu_block_all(kvm);
 912
 913         kvm_for_each_vcpu(i, vcpu, kvm) {
 914                 kvm_s390_vcpu_crypto_setup(vcpu);
 915                 /* recreate the shadow crycb by leaving the VSIE handler */
 916                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 917         }
 918
 919         kvm_s390_vcpu_unblock_all(kvm);
 920 }
 921
 922 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 923 {
 924         mutex_lock(&kvm->lock);
 925         switch (attr->attr) {
 926         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 927                 if (!test_kvm_facility(kvm, 76)) {
 928                         mutex_unlock(&kvm->lock);
 929                         return -EINVAL;
 930                 }
 931                 get_random_bytes(
 932                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 933                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 934                 kvm->arch.crypto.aes_kw = 1;
 935                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 936                 break;
 937         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 938                 if (!test_kvm_facility(kvm, 76)) {
 939                         mutex_unlock(&kvm->lock);
 940                         return -EINVAL;
 941                 }
 942                 get_random_bytes(
 943                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 944                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 945                 kvm->arch.crypto.dea_kw = 1;
 946                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 947                 break;
 948         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 949                 if (!test_kvm_facility(kvm, 76)) {
 950                         mutex_unlock(&kvm->lock);
 951                         return -EINVAL;
 952                 }
 953                 kvm->arch.crypto.aes_kw = 0;
 954                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 955                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 956                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 957                 break;
 958         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 959                 if (!test_kvm_facility(kvm, 76)) {
 960                         mutex_unlock(&kvm->lock);
 961                         return -EINVAL;
 962                 }
 963                 kvm->arch.crypto.dea_kw = 0;
 964                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 965                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 966                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 967                 break;
 968         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 969                 if (!ap_instructions_available()) {
 970                         mutex_unlock(&kvm->lock);
 971                         return -EOPNOTSUPP;
 972                 }
 973                 kvm->arch.crypto.apie = 1;
 974                 break;
 975         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
 976                 if (!ap_instructions_available()) {
 977                         mutex_unlock(&kvm->lock);
 978                         return -EOPNOTSUPP;
 979                 }
 980                 kvm->arch.crypto.apie = 0;
 981                 break;
 982         default:
 983                 mutex_unlock(&kvm->lock);
 984                 return -ENXIO;
 985         }
 986
 987         kvm_s390_vcpu_crypto_reset_all(kvm);
 988         mutex_unlock(&kvm->lock);
 989         return 0;
 990 }
 991
 992 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
 993 {
 994         int cx;
 995         struct kvm_vcpu *vcpu;
 996
 997         kvm_for_each_vcpu(cx, vcpu, kvm)
 998                 kvm_s390_sync_request(req, vcpu);
 999 }
1000
1001 /*
1002  * Must be called with kvm->srcu held to avoid races on memslots, and with
1003  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1004  */
1005 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1006 {
1007         struct kvm_memory_slot *ms;
1008         struct kvm_memslots *slots;
1009         unsigned long ram_pages = 0;
1010         int slotnr;
1011
1012         /* migration mode already enabled */
1013         if (kvm->arch.migration_mode)
1014                 return 0;
1015         slots = kvm_memslots(kvm);
1016         if (!slots || !slots->used_slots)
1017                 return -EINVAL;
1018
1019         if (!kvm->arch.use_cmma) {
1020                 kvm->arch.migration_mode = 1;
1021                 return 0;
1022         }
1023         /* mark all the pages in active slots as dirty */
1024         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1025                 ms = slots->memslots + slotnr;
1026                 if (!ms->dirty_bitmap)
1027                         return -EINVAL;
1028                 /*
1029                  * The second half of the bitmap is only used on x86,
1030                  * and would be wasted otherwise, so we put it to good
1031                  * use here to keep track of the state of the storage
1032                  * attributes.
1033                  */
1034                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1035                 ram_pages += ms->npages;
1036         }
1037         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1038         kvm->arch.migration_mode = 1;
1039         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1040         return 0;
1041 }
1042
1043 /*
1044  * Must be called with kvm->slots_lock to avoid races with ourselves and
1045  * kvm_s390_vm_start_migration.
1046  */
1047 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1048 {
1049         /* migration mode already disabled */
1050         if (!kvm->arch.migration_mode)
1051                 return 0;
1052         kvm->arch.migration_mode = 0;
1053         if (kvm->arch.use_cmma)
1054                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1055         return 0;
1056 }
1057
1058 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1059                                      struct kvm_device_attr *attr)
1060 {
1061         int res = -ENXIO;
1062
1063         mutex_lock(&kvm->slots_lock);
1064         switch (attr->attr) {
1065         case KVM_S390_VM_MIGRATION_START:
1066                 res = kvm_s390_vm_start_migration(kvm);
1067                 break;
1068         case KVM_S390_VM_MIGRATION_STOP:
1069                 res = kvm_s390_vm_stop_migration(kvm);
1070                 break;
1071         default:
1072                 break;
1073         }
1074         mutex_unlock(&kvm->slots_lock);
1075
1076         return res;
1077 }
1078
1079 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1080                                      struct kvm_device_attr *attr)
1081 {
1082         u64 mig = kvm->arch.migration_mode;
1083
1084         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1085                 return -ENXIO;
1086
1087         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1088                 return -EFAULT;
1089         return 0;
1090 }
1091
1092 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1093 {
1094         struct kvm_s390_vm_tod_clock gtod;
1095
1096         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1097                 return -EFAULT;
1098
1099         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1100                 return -EINVAL;
1101         kvm_s390_set_tod_clock(kvm, &gtod);
1102
1103         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1104                 gtod.epoch_idx, gtod.tod);
1105
1106         return 0;
1107 }
1108
1109 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1110 {
1111         u8 gtod_high;
1112
1113         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1114                                            sizeof(gtod_high)))
1115                 return -EFAULT;
1116
1117         if (gtod_high != 0)
1118                 return -EINVAL;
1119         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1120
1121         return 0;
1122 }
1123
1124 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1125 {
1126         struct kvm_s390_vm_tod_clock gtod = { 0 };
1127
1128         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1129                            sizeof(gtod.tod)))
1130                 return -EFAULT;
1131
1132         kvm_s390_set_tod_clock(kvm, &gtod);
1133         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1134         return 0;
1135 }
1136
1137 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1138 {
1139         int ret;
1140
1141         if (attr->flags)
1142                 return -EINVAL;
1143
1144         switch (attr->attr) {
1145         case KVM_S390_VM_TOD_EXT:
1146                 ret = kvm_s390_set_tod_ext(kvm, attr);
1147                 break;
1148         case KVM_S390_VM_TOD_HIGH:
1149                 ret = kvm_s390_set_tod_high(kvm, attr);
1150                 break;
1151         case KVM_S390_VM_TOD_LOW:
1152                 ret = kvm_s390_set_tod_low(kvm, attr);
1153                 break;
1154         default:
1155                 ret = -ENXIO;
1156                 break;
1157         }
1158         return ret;
1159 }
1160
1161 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1162                                    struct kvm_s390_vm_tod_clock *gtod)
1163 {
1164         struct kvm_s390_tod_clock_ext htod;
1165
1166         preempt_disable();
1167
1168         get_tod_clock_ext((char *)&htod);
1169
1170         gtod->tod = htod.tod + kvm->arch.epoch;
1171         gtod->epoch_idx = 0;
1172         if (test_kvm_facility(kvm, 139)) {
1173                 gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
1174                 if (gtod->tod < htod.tod)
1175                         gtod->epoch_idx += 1;
1176         }
1177
1178         preempt_enable();
1179 }
1180
1181 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1182 {
1183         struct kvm_s390_vm_tod_clock gtod;
1184
1185         memset(&gtod, 0, sizeof(gtod));
1186         kvm_s390_get_tod_clock(kvm, &gtod);
1187         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1188                 return -EFAULT;
1189
1190         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1191                 gtod.epoch_idx, gtod.tod);
1192         return 0;
1193 }
1194
1195 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1196 {
1197         u8 gtod_high = 0;
1198
1199         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1200                                          sizeof(gtod_high)))
1201                 return -EFAULT;
1202         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1203
1204         return 0;
1205 }
1206
1207 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1208 {
1209         u64 gtod;
1210
1211         gtod = kvm_s390_get_tod_clock_fast(kvm);
1212         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1213                 return -EFAULT;
1214         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1215
1216         return 0;
1217 }
1218
1219 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1220 {
1221         int ret;
1222
1223         if (attr->flags)
1224                 return -EINVAL;
1225
1226         switch (attr->attr) {
1227         case KVM_S390_VM_TOD_EXT:
1228                 ret = kvm_s390_get_tod_ext(kvm, attr);
1229                 break;
1230         case KVM_S390_VM_TOD_HIGH:
1231                 ret = kvm_s390_get_tod_high(kvm, attr);
1232                 break;
1233         case KVM_S390_VM_TOD_LOW:
1234                 ret = kvm_s390_get_tod_low(kvm, attr);
1235                 break;
1236         default:
1237                 ret = -ENXIO;
1238                 break;
1239         }
1240         return ret;
1241 }
1242
1243 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1244 {
1245         struct kvm_s390_vm_cpu_processor *proc;
1246         u16 lowest_ibc, unblocked_ibc;
1247         int ret = 0;
1248
1249         mutex_lock(&kvm->lock);
1250         if (kvm->created_vcpus) {
1251                 ret = -EBUSY;
1252                 goto out;
1253         }
1254         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1255         if (!proc) {
1256                 ret = -ENOMEM;
1257                 goto out;
1258         }
1259         if (!copy_from_user(proc, (void __user *)attr->addr,
1260                             sizeof(*proc))) {
1261                 kvm->arch.model.cpuid = proc->cpuid;
1262                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1263                 unblocked_ibc = sclp.ibc & 0xfff;
1264                 if (lowest_ibc && proc->ibc) {
1265                         if (proc->ibc > unblocked_ibc)
1266                                 kvm->arch.model.ibc = unblocked_ibc;
1267                         else if (proc->ibc < lowest_ibc)
1268                                 kvm->arch.model.ibc = lowest_ibc;
1269                         else
1270                                 kvm->arch.model.ibc = proc->ibc;
1271                 }
1272                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1273                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1274                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1275                          kvm->arch.model.ibc,
1276                          kvm->arch.model.cpuid);
1277                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1278                          kvm->arch.model.fac_list[0],
1279                          kvm->arch.model.fac_list[1],
1280                          kvm->arch.model.fac_list[2]);
1281         } else
1282                 ret = -EFAULT;
1283         kfree(proc);
1284 out:
1285         mutex_unlock(&kvm->lock);
1286         return ret;
1287 }
1288
1289 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1290                                        struct kvm_device_attr *attr)
1291 {
1292         struct kvm_s390_vm_cpu_feat data;
1293
1294         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1295                 return -EFAULT;
1296         if (!bitmap_subset((unsigned long *) data.feat,
1297                            kvm_s390_available_cpu_feat,
1298                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1299                 return -EINVAL;
1300
1301         mutex_lock(&kvm->lock);
1302         if (kvm->created_vcpus) {
1303                 mutex_unlock(&kvm->lock);
1304                 return -EBUSY;
1305         }
1306         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1307                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1308         mutex_unlock(&kvm->lock);
1309         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1310                          data.feat[0],
1311                          data.feat[1],
1312                          data.feat[2]);
1313         return 0;
1314 }
1315
1316 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1317                                           struct kvm_device_attr *attr)
1318 {
1319         mutex_lock(&kvm->lock);
1320         if (kvm->created_vcpus) {
1321                 mutex_unlock(&kvm->lock);
1322                 return -EBUSY;
1323         }
1324
1325         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1326                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1327                 mutex_unlock(&kvm->lock);
1328                 return -EFAULT;
1329         }
1330         mutex_unlock(&kvm->lock);
1331
1332         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1333                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1334                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1335                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1336                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1337         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1338                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1339                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1340         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1341                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1342                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1343         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1344                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1345                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1346         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1347                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1348                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1349         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1350                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1351                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1352         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1353                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1354                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1355         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1356                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1357                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1358         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1359                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1360                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1361         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1364         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1367         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1369                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1370         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1372                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1373         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1375                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1376         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1378                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1379         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1381                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1384         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1387                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1389
1390         return 0;
1391 }
1392
1393 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1394 {
1395         int ret = -ENXIO;
1396
1397         switch (attr->attr) {
1398         case KVM_S390_VM_CPU_PROCESSOR:
1399                 ret = kvm_s390_set_processor(kvm, attr);
1400                 break;
1401         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1402                 ret = kvm_s390_set_processor_feat(kvm, attr);
1403                 break;
1404         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1405                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1406                 break;
1407         }
1408         return ret;
1409 }
1410
1411 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1412 {
1413         struct kvm_s390_vm_cpu_processor *proc;
1414         int ret = 0;
1415
1416         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1417         if (!proc) {
1418                 ret = -ENOMEM;
1419                 goto out;
1420         }
1421         proc->cpuid = kvm->arch.model.cpuid;
1422         proc->ibc = kvm->arch.model.ibc;
1423         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1424                S390_ARCH_FAC_LIST_SIZE_BYTE);
1425         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1426                  kvm->arch.model.ibc,
1427                  kvm->arch.model.cpuid);
1428         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1429                  kvm->arch.model.fac_list[0],
1430                  kvm->arch.model.fac_list[1],
1431                  kvm->arch.model.fac_list[2]);
1432         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1433                 ret = -EFAULT;
1434         kfree(proc);
1435 out:
1436         return ret;
1437 }
1438
1439 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1440 {
1441         struct kvm_s390_vm_cpu_machine *mach;
1442         int ret = 0;
1443
1444         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1445         if (!mach) {
1446                 ret = -ENOMEM;
1447                 goto out;
1448         }
1449         get_cpu_id((struct cpuid *) &mach->cpuid);
1450         mach->ibc = sclp.ibc;
1451         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1452                S390_ARCH_FAC_LIST_SIZE_BYTE);
1453         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1454                sizeof(S390_lowcore.stfle_fac_list));
1455         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1456                  kvm->arch.model.ibc,
1457                  kvm->arch.model.cpuid);
1458         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1459                  mach->fac_mask[0],
1460                  mach->fac_mask[1],
1461                  mach->fac_mask[2]);
1462         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1463                  mach->fac_list[0],
1464                  mach->fac_list[1],
1465                  mach->fac_list[2]);
1466         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1467                 ret = -EFAULT;
1468         kfree(mach);
1469 out:
1470         return ret;
1471 }
1472
1473 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1474                                        struct kvm_device_attr *attr)
1475 {
1476         struct kvm_s390_vm_cpu_feat data;
1477
1478         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1479                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1480         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1481                 return -EFAULT;
1482         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1483                          data.feat[0],
1484                          data.feat[1],
1485                          data.feat[2]);
1486         return 0;
1487 }
1488
1489 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1490                                      struct kvm_device_attr *attr)
1491 {
1492         struct kvm_s390_vm_cpu_feat data;
1493
1494         bitmap_copy((unsigned long *) data.feat,
1495                     kvm_s390_available_cpu_feat,
1496                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1497         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1498                 return -EFAULT;
1499         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1500                          data.feat[0],
1501                          data.feat[1],
1502                          data.feat[2]);
1503         return 0;
1504 }
1505
1506 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1507                                           struct kvm_device_attr *attr)
1508 {
1509         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1510             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1511                 return -EFAULT;
1512
1513         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1514                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1515                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1516                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1517                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1518         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1519                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1520                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1521         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1522                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1523                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1524         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1525                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1526                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1527         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1528                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1529                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1530         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1531                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1532                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1533         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1534                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1535                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1536         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1537                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1538                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1539         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1540                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1541                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1542         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1545         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1548         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1550                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1551         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1553                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1554         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1556                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1557         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1559                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1560         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1562                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1565         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1568                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1570
1571         return 0;
1572 }
1573
1574 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1575                                         struct kvm_device_attr *attr)
1576 {
1577         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1578             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1579                 return -EFAULT;
1580
1581         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1582                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1583                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1584                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1585                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1586         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1587                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1588                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1589         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1591                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1592         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1593                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1594                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1595         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1596                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1597                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1598         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1599                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1600                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1601         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1602                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1603                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1604         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1605                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1606                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1607         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1608                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1609                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1610         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1613         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1614                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1615                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1616         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1617                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1618                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1619         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1620                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1621                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1622         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1624                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1625         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1626                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1627                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1628         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1629                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1630                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1631                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1633         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1634                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1636                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1637                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1638
1639         return 0;
1640 }
1641
1642 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1643 {
1644         int ret = -ENXIO;
1645
1646         switch (attr->attr) {
1647         case KVM_S390_VM_CPU_PROCESSOR:
1648                 ret = kvm_s390_get_processor(kvm, attr);
1649                 break;
1650         case KVM_S390_VM_CPU_MACHINE:
1651                 ret = kvm_s390_get_machine(kvm, attr);
1652                 break;
1653         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1654                 ret = kvm_s390_get_processor_feat(kvm, attr);
1655                 break;
1656         case KVM_S390_VM_CPU_MACHINE_FEAT:
1657                 ret = kvm_s390_get_machine_feat(kvm, attr);
1658                 break;
1659         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1660                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1661                 break;
1662         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1663                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1664                 break;
1665         }
1666         return ret;
1667 }
1668
1669 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1670 {
1671         int ret;
1672
1673         switch (attr->group) {
1674         case KVM_S390_VM_MEM_CTRL:
1675                 ret = kvm_s390_set_mem_control(kvm, attr);
1676                 break;
1677         case KVM_S390_VM_TOD:
1678                 ret = kvm_s390_set_tod(kvm, attr);
1679                 break;
1680         case KVM_S390_VM_CPU_MODEL:
1681                 ret = kvm_s390_set_cpu_model(kvm, attr);
1682                 break;
1683         case KVM_S390_VM_CRYPTO:
1684                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1685                 break;
1686         case KVM_S390_VM_MIGRATION:
1687                 ret = kvm_s390_vm_set_migration(kvm, attr);
1688                 break;
1689         default:
1690                 ret = -ENXIO;
1691                 break;
1692         }
1693
1694         return ret;
1695 }
1696
1697 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1698 {
1699         int ret;
1700
1701         switch (attr->group) {
1702         case KVM_S390_VM_MEM_CTRL:
1703                 ret = kvm_s390_get_mem_control(kvm, attr);
1704                 break;
1705         case KVM_S390_VM_TOD:
1706                 ret = kvm_s390_get_tod(kvm, attr);
1707                 break;
1708         case KVM_S390_VM_CPU_MODEL:
1709                 ret = kvm_s390_get_cpu_model(kvm, attr);
1710                 break;
1711         case KVM_S390_VM_MIGRATION:
1712                 ret = kvm_s390_vm_get_migration(kvm, attr);
1713                 break;
1714         default:
1715                 ret = -ENXIO;
1716                 break;
1717         }
1718
1719         return ret;
1720 }
1721
1722 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1723 {
1724         int ret;
1725
1726         switch (attr->group) {
1727         case KVM_S390_VM_MEM_CTRL:
1728                 switch (attr->attr) {
1729                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1730                 case KVM_S390_VM_MEM_CLR_CMMA:
1731                         ret = sclp.has_cmma ? 0 : -ENXIO;
1732                         break;
1733                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1734                         ret = 0;
1735                         break;
1736                 default:
1737                         ret = -ENXIO;
1738                         break;
1739                 }
1740                 break;
1741         case KVM_S390_VM_TOD:
1742                 switch (attr->attr) {
1743                 case KVM_S390_VM_TOD_LOW:
1744                 case KVM_S390_VM_TOD_HIGH:
1745                         ret = 0;
1746                         break;
1747                 default:
1748                         ret = -ENXIO;
1749                         break;
1750                 }
1751                 break;
1752         case KVM_S390_VM_CPU_MODEL:
1753                 switch (attr->attr) {
1754                 case KVM_S390_VM_CPU_PROCESSOR:
1755                 case KVM_S390_VM_CPU_MACHINE:
1756                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1757                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1758                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1759                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1760                         ret = 0;
1761                         break;
1762                 default:
1763                         ret = -ENXIO;
1764                         break;
1765                 }
1766                 break;
1767         case KVM_S390_VM_CRYPTO:
1768                 switch (attr->attr) {
1769                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1770                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1771                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1772                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1773                         ret = 0;
1774                         break;
1775                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1776                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1777                         ret = ap_instructions_available() ? 0 : -ENXIO;
1778                         break;
1779                 default:
1780                         ret = -ENXIO;
1781                         break;
1782                 }
1783                 break;
1784         case KVM_S390_VM_MIGRATION:
1785                 ret = 0;
1786                 break;
1787         default:
1788                 ret = -ENXIO;
1789                 break;
1790         }
1791
1792         return ret;
1793 }
1794
1795 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1796 {
1797         uint8_t *keys;
1798         uint64_t hva;
1799         int srcu_idx, i, r = 0;
1800
1801         if (args->flags != 0)
1802                 return -EINVAL;
1803
1804         /* Is this guest using storage keys? */
1805         if (!mm_uses_skeys(current->mm))
1806                 return KVM_S390_GET_SKEYS_NONE;
1807
1808         /* Enforce sane limit on memory allocation */
1809         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1810                 return -EINVAL;
1811
1812         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1813         if (!keys)
1814                 return -ENOMEM;
1815
1816         down_read(&current->mm->mmap_sem);
1817         srcu_idx = srcu_read_lock(&kvm->srcu);
1818         for (i = 0; i < args->count; i++) {
1819                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1820                 if (kvm_is_error_hva(hva)) {
1821                         r = -EFAULT;
1822                         break;
1823                 }
1824
1825                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1826                 if (r)
1827                         break;
1828         }
1829         srcu_read_unlock(&kvm->srcu, srcu_idx);
1830         up_read(&current->mm->mmap_sem);
1831
1832         if (!r) {
1833                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1834                                  sizeof(uint8_t) * args->count);
1835                 if (r)
1836                         r = -EFAULT;
1837         }
1838
1839         kvfree(keys);
1840         return r;
1841 }
1842
1843 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1844 {
1845         uint8_t *keys;
1846         uint64_t hva;
1847         int srcu_idx, i, r = 0;
1848         bool unlocked;
1849
1850         if (args->flags != 0)
1851                 return -EINVAL;
1852
1853         /* Enforce sane limit on memory allocation */
1854         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1855                 return -EINVAL;
1856
1857         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1858         if (!keys)
1859                 return -ENOMEM;
1860
1861         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1862                            sizeof(uint8_t) * args->count);
1863         if (r) {
1864                 r = -EFAULT;
1865                 goto out;
1866         }
1867
1868         /* Enable storage key handling for the guest */
1869         r = s390_enable_skey();
1870         if (r)
1871                 goto out;
1872
1873         i = 0;
1874         down_read(&current->mm->mmap_sem);
1875         srcu_idx = srcu_read_lock(&kvm->srcu);
1876         while (i < args->count) {
1877                 unlocked = false;
1878                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1879                 if (kvm_is_error_hva(hva)) {
1880                         r = -EFAULT;
1881                         break;
1882                 }
1883
1884                 /* Lowest order bit is reserved */
1885                 if (keys[i] & 0x01) {
1886                         r = -EINVAL;
1887                         break;
1888                 }
1889
1890                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1891                 if (r) {
1892                         r = fixup_user_fault(current, current->mm, hva,
1893                                              FAULT_FLAG_WRITE, &unlocked);
1894                         if (r)
1895                                 break;
1896                 }
1897                 if (!r)
1898                         i++;
1899         }
1900         srcu_read_unlock(&kvm->srcu, srcu_idx);
1901         up_read(&current->mm->mmap_sem);
1902 out:
1903         kvfree(keys);
1904         return r;
1905 }
1906
1907 /*
1908  * Base address and length must be sent at the start of each block, therefore
1909  * it's cheaper to send some clean data, as long as it's less than the size of
1910  * two longs.
1911  */
1912 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1913 /* for consistency */
1914 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1915
1916 /*
1917  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1918  * address falls in a hole. In that case the index of one of the memslots
1919  * bordering the hole is returned.
1920  */
1921 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1922 {
1923         int start = 0, end = slots->used_slots;
1924         int slot = atomic_read(&slots->lru_slot);
1925         struct kvm_memory_slot *memslots = slots->memslots;
1926
1927         if (gfn >= memslots[slot].base_gfn &&
1928             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1929                 return slot;
1930
1931         while (start < end) {
1932                 slot = start + (end - start) / 2;
1933
1934                 if (gfn >= memslots[slot].base_gfn)
1935                         end = slot;
1936                 else
1937                         start = slot + 1;
1938         }
1939
1940         if (start >= slots->used_slots)
1941                 return slots->used_slots - 1;
1942
1943         if (gfn >= memslots[start].base_gfn &&
1944             gfn < memslots[start].base_gfn + memslots[start].npages) {
1945                 atomic_set(&slots->lru_slot, start);
1946         }
1947
1948         return start;
1949 }
1950
1951 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1952                               u8 *res, unsigned long bufsize)
1953 {
1954         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1955
1956         args->count = 0;
1957         while (args->count < bufsize) {
1958                 hva = gfn_to_hva(kvm, cur_gfn);
1959                 /*
1960                  * We return an error if the first value was invalid, but we
1961                  * return successfully if at least one value was copied.
1962                  */
1963                 if (kvm_is_error_hva(hva))
1964                         return args->count ? 0 : -EFAULT;
1965                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1966                         pgstev = 0;
1967                 res[args->count++] = (pgstev >> 24) & 0x43;
1968                 cur_gfn++;
1969         }
1970
1971         return 0;
1972 }
1973
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975                                               unsigned long cur_gfn)
1976 {
1977         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
1978         struct kvm_memory_slot *ms = slots->memslots + slotidx;
1979         unsigned long ofs = cur_gfn - ms->base_gfn;
1980
1981         if (ms->base_gfn + ms->npages <= cur_gfn) {
1982                 slotidx--;
1983                 /* If we are above the highest slot, wrap around */
1984                 if (slotidx < 0)
1985                         slotidx = slots->used_slots - 1;
1986
1987                 ms = slots->memslots + slotidx;
1988                 ofs = 0;
1989         }
1990         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991         while ((slotidx > 0) && (ofs >= ms->npages)) {
1992                 slotidx--;
1993                 ms = slots->memslots + slotidx;
1994                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
1995         }
1996         return ms->base_gfn + ofs;
1997 }
1998
1999 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2000                              u8 *res, unsigned long bufsize)
2001 {
2002         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2003         struct kvm_memslots *slots = kvm_memslots(kvm);
2004         struct kvm_memory_slot *ms;
2005
2006         if (unlikely(!slots->used_slots))
2007                 return 0;
2008
2009         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2010         ms = gfn_to_memslot(kvm, cur_gfn);
2011         args->count = 0;
2012         args->start_gfn = cur_gfn;
2013         if (!ms)
2014                 return 0;
2015         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2016         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2017
2018         while (args->count < bufsize) {
2019                 hva = gfn_to_hva(kvm, cur_gfn);
2020                 if (kvm_is_error_hva(hva))
2021                         return 0;
2022                 /* Decrement only if we actually flipped the bit to 0 */
2023                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2024                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2025                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2026                         pgstev = 0;
2027                 /* Save the value */
2028                 res[args->count++] = (pgstev >> 24) & 0x43;
2029                 /* If the next bit is too far away, stop. */
2030                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2031                         return 0;
2032                 /* If we reached the previous "next", find the next one */
2033                 if (cur_gfn == next_gfn)
2034                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2035                 /* Reached the end of memory or of the buffer, stop */
2036                 if ((next_gfn >= mem_end) ||
2037                     (next_gfn - args->start_gfn >= bufsize))
2038                         return 0;
2039                 cur_gfn++;
2040                 /* Reached the end of the current memslot, take the next one. */
2041                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2042                         ms = gfn_to_memslot(kvm, cur_gfn);
2043                         if (!ms)
2044                                 return 0;
2045                 }
2046         }
2047         return 0;
2048 }
2049
2050 /*
2051  * This function searches for the next page with dirty CMMA attributes, and
2052  * saves the attributes in the buffer up to either the end of the buffer or
2053  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2054  * no trailing clean bytes are saved.
2055  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2056  * output buffer will indicate 0 as length.
2057  */
2058 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2059                                   struct kvm_s390_cmma_log *args)
2060 {
2061         unsigned long bufsize;
2062         int srcu_idx, peek, ret;
2063         u8 *values;
2064
2065         if (!kvm->arch.use_cmma)
2066                 return -ENXIO;
2067         /* Invalid/unsupported flags were specified */
2068         if (args->flags & ~KVM_S390_CMMA_PEEK)
2069                 return -EINVAL;
2070         /* Migration mode query, and we are not doing a migration */
2071         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2072         if (!peek && !kvm->arch.migration_mode)
2073                 return -EINVAL;
2074         /* CMMA is disabled or was not used, or the buffer has length zero */
2075         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2076         if (!bufsize || !kvm->mm->context.uses_cmm) {
2077                 memset(args, 0, sizeof(*args));
2078                 return 0;
2079         }
2080         /* We are not peeking, and there are no dirty pages */
2081         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2082                 memset(args, 0, sizeof(*args));
2083                 return 0;
2084         }
2085
2086         values = vmalloc(bufsize);
2087         if (!values)
2088                 return -ENOMEM;
2089
2090         down_read(&kvm->mm->mmap_sem);
2091         srcu_idx = srcu_read_lock(&kvm->srcu);
2092         if (peek)
2093                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2094         else
2095                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2096         srcu_read_unlock(&kvm->srcu, srcu_idx);
2097         up_read(&kvm->mm->mmap_sem);
2098
2099         if (kvm->arch.migration_mode)
2100                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2101         else
2102                 args->remaining = 0;
2103
2104         if (copy_to_user((void __user *)args->values, values, args->count))
2105                 ret = -EFAULT;
2106
2107         vfree(values);
2108         return ret;
2109 }
2110
2111 /*
2112  * This function sets the CMMA attributes for the given pages. If the input
2113  * buffer has zero length, no action is taken, otherwise the attributes are
2114  * set and the mm->context.uses_cmm flag is set.
2115  */
2116 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2117                                   const struct kvm_s390_cmma_log *args)
2118 {
2119         unsigned long hva, mask, pgstev, i;
2120         uint8_t *bits;
2121         int srcu_idx, r = 0;
2122
2123         mask = args->mask;
2124
2125         if (!kvm->arch.use_cmma)
2126                 return -ENXIO;
2127         /* invalid/unsupported flags */
2128         if (args->flags != 0)
2129                 return -EINVAL;
2130         /* Enforce sane limit on memory allocation */
2131         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2132                 return -EINVAL;
2133         /* Nothing to do */
2134         if (args->count == 0)
2135                 return 0;
2136
2137         bits = vmalloc(array_size(sizeof(*bits), args->count));
2138         if (!bits)
2139                 return -ENOMEM;
2140
2141         r = copy_from_user(bits, (void __user *)args->values, args->count);
2142         if (r) {
2143                 r = -EFAULT;
2144                 goto out;
2145         }
2146
2147         down_read(&kvm->mm->mmap_sem);
2148         srcu_idx = srcu_read_lock(&kvm->srcu);
2149         for (i = 0; i < args->count; i++) {
2150                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2151                 if (kvm_is_error_hva(hva)) {
2152                         r = -EFAULT;
2153                         break;
2154                 }
2155
2156                 pgstev = bits[i];
2157                 pgstev = pgstev << 24;
2158                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2159                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2160         }
2161         srcu_read_unlock(&kvm->srcu, srcu_idx);
2162         up_read(&kvm->mm->mmap_sem);
2163
2164         if (!kvm->mm->context.uses_cmm) {
2165                 down_write(&kvm->mm->mmap_sem);
2166                 kvm->mm->context.uses_cmm = 1;
2167                 up_write(&kvm->mm->mmap_sem);
2168         }
2169 out:
2170         vfree(bits);
2171         return r;
2172 }
2173
2174 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2175 {
2176         struct kvm_vcpu *vcpu;
2177         u16 rc, rrc;
2178         int ret = 0;
2179         int i;
2180
2181         /*
2182          * We ignore failures and try to destroy as many CPUs as possible.
2183          * At the same time we must not free the assigned resources when
2184          * this fails, as the ultravisor has still access to that memory.
2185          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2186          * behind.
2187          * We want to return the first failure rc and rrc, though.
2188          */
2189         kvm_for_each_vcpu(i, vcpu, kvm) {
2190                 mutex_lock(&vcpu->mutex);
2191                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2192                         *rcp = rc;
2193                         *rrcp = rrc;
2194                         ret = -EIO;
2195                 }
2196                 mutex_unlock(&vcpu->mutex);
2197         }
2198         return ret;
2199 }
2200
2201 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2202 {
2203         int i, r = 0;
2204         u16 dummy;
2205
2206         struct kvm_vcpu *vcpu;
2207
2208         kvm_for_each_vcpu(i, vcpu, kvm) {
2209                 mutex_lock(&vcpu->mutex);
2210                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2211                 mutex_unlock(&vcpu->mutex);
2212                 if (r)
2213                         break;
2214         }
2215         if (r)
2216                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2217         return r;
2218 }
2219
2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2221 {
2222         int r = 0;
2223         u16 dummy;
2224         void __user *argp = (void __user *)cmd->data;
2225
2226         switch (cmd->cmd) {
2227         case KVM_PV_ENABLE: {
2228                 r = -EINVAL;
2229                 if (kvm_s390_pv_is_protected(kvm))
2230                         break;
2231
2232                 /*
2233                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2234                  *  esca, we need no cleanup in the error cases below
2235                  */
2236                 r = sca_switch_to_extended(kvm);
2237                 if (r)
2238                         break;
2239
2240                 down_write(&current->mm->mmap_sem);
2241                 r = gmap_mark_unmergeable();
2242                 up_write(&current->mm->mmap_sem);
2243                 if (r)
2244                         break;
2245
2246                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2247                 if (r)
2248                         break;
2249
2250                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2251                 if (r)
2252                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2253
2254                 /* we need to block service interrupts from now on */
2255                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2256                 break;
2257         }
2258         case KVM_PV_DISABLE: {
2259                 r = -EINVAL;
2260                 if (!kvm_s390_pv_is_protected(kvm))
2261                         break;
2262
2263                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2264                 /*
2265                  * If a CPU could not be destroyed, destroy VM will also fail.
2266                  * There is no point in trying to destroy it. Instead return
2267                  * the rc and rrc from the first CPU that failed destroying.
2268                  */
2269                 if (r)
2270                         break;
2271                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2272
2273                 /* no need to block service interrupts any more */
2274                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2275                 break;
2276         }
2277         case KVM_PV_SET_SEC_PARMS: {
2278                 struct kvm_s390_pv_sec_parm parms = {};
2279                 void *hdr;
2280
2281                 r = -EINVAL;
2282                 if (!kvm_s390_pv_is_protected(kvm))
2283                         break;
2284
2285                 r = -EFAULT;
2286                 if (copy_from_user(&parms, argp, sizeof(parms)))
2287                         break;
2288
2289                 /* Currently restricted to 8KB */
2290                 r = -EINVAL;
2291                 if (parms.length > PAGE_SIZE * 2)
2292                         break;
2293
2294                 r = -ENOMEM;
2295                 hdr = vmalloc(parms.length);
2296                 if (!hdr)
2297                         break;
2298
2299                 r = -EFAULT;
2300                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2301                                     parms.length))
2302                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2303                                                       &cmd->rc, &cmd->rrc);
2304
2305                 vfree(hdr);
2306                 break;
2307         }
2308         case KVM_PV_UNPACK: {
2309                 struct kvm_s390_pv_unp unp = {};
2310
2311                 r = -EINVAL;
2312                 if (!kvm_s390_pv_is_protected(kvm))
2313                         break;
2314
2315                 r = -EFAULT;
2316                 if (copy_from_user(&unp, argp, sizeof(unp)))
2317                         break;
2318
2319                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2320                                        &cmd->rc, &cmd->rrc);
2321                 break;
2322         }
2323         case KVM_PV_VERIFY: {
2324                 r = -EINVAL;
2325                 if (!kvm_s390_pv_is_protected(kvm))
2326                         break;
2327
2328                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2329                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2330                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2331                              cmd->rrc);
2332                 break;
2333         }
2334         case KVM_PV_PREP_RESET: {
2335                 r = -EINVAL;
2336                 if (!kvm_s390_pv_is_protected(kvm))
2337                         break;
2338
2339                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2340                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2341                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2342                              cmd->rc, cmd->rrc);
2343                 break;
2344         }
2345         case KVM_PV_UNSHARE_ALL: {
2346                 r = -EINVAL;
2347                 if (!kvm_s390_pv_is_protected(kvm))
2348                         break;
2349
2350                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2352                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2353                              cmd->rc, cmd->rrc);
2354                 break;
2355         }
2356         default:
2357                 r = -ENOTTY;
2358         }
2359         return r;
2360 }
2361
2362 long kvm_arch_vm_ioctl(struct file *filp,
2363                        unsigned int ioctl, unsigned long arg)
2364 {
2365         struct kvm *kvm = filp->private_data;
2366         void __user *argp = (void __user *)arg;
2367         struct kvm_device_attr attr;
2368         int r;
2369
2370         switch (ioctl) {
2371         case KVM_S390_INTERRUPT: {
2372                 struct kvm_s390_interrupt s390int;
2373
2374                 r = -EFAULT;
2375                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2376                         break;
2377                 r = kvm_s390_inject_vm(kvm, &s390int);
2378                 break;
2379         }
2380         case KVM_CREATE_IRQCHIP: {
2381                 struct kvm_irq_routing_entry routing;
2382
2383                 r = -EINVAL;
2384                 if (kvm->arch.use_irqchip) {
2385                         /* Set up dummy routing. */
2386                         memset(&routing, 0, sizeof(routing));
2387                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2388                 }
2389                 break;
2390         }
2391         case KVM_SET_DEVICE_ATTR: {
2392                 r = -EFAULT;
2393                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2394                         break;
2395                 r = kvm_s390_vm_set_attr(kvm, &attr);
2396                 break;
2397         }
2398         case KVM_GET_DEVICE_ATTR: {
2399                 r = -EFAULT;
2400                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2401                         break;
2402                 r = kvm_s390_vm_get_attr(kvm, &attr);
2403                 break;
2404         }
2405         case KVM_HAS_DEVICE_ATTR: {
2406                 r = -EFAULT;
2407                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2408                         break;
2409                 r = kvm_s390_vm_has_attr(kvm, &attr);
2410                 break;
2411         }
2412         case KVM_S390_GET_SKEYS: {
2413                 struct kvm_s390_skeys args;
2414
2415                 r = -EFAULT;
2416                 if (copy_from_user(&args, argp,
2417                                    sizeof(struct kvm_s390_skeys)))
2418                         break;
2419                 r = kvm_s390_get_skeys(kvm, &args);
2420                 break;
2421         }
2422         case KVM_S390_SET_SKEYS: {
2423                 struct kvm_s390_skeys args;
2424
2425                 r = -EFAULT;
2426                 if (copy_from_user(&args, argp,
2427                                    sizeof(struct kvm_s390_skeys)))
2428                         break;
2429                 r = kvm_s390_set_skeys(kvm, &args);
2430                 break;
2431         }
2432         case KVM_S390_GET_CMMA_BITS: {
2433                 struct kvm_s390_cmma_log args;
2434
2435                 r = -EFAULT;
2436                 if (copy_from_user(&args, argp, sizeof(args)))
2437                         break;
2438                 mutex_lock(&kvm->slots_lock);
2439                 r = kvm_s390_get_cmma_bits(kvm, &args);
2440                 mutex_unlock(&kvm->slots_lock);
2441                 if (!r) {
2442                         r = copy_to_user(argp, &args, sizeof(args));
2443                         if (r)
2444                                 r = -EFAULT;
2445                 }
2446                 break;
2447         }
2448         case KVM_S390_SET_CMMA_BITS: {
2449                 struct kvm_s390_cmma_log args;
2450
2451                 r = -EFAULT;
2452                 if (copy_from_user(&args, argp, sizeof(args)))
2453                         break;
2454                 mutex_lock(&kvm->slots_lock);
2455                 r = kvm_s390_set_cmma_bits(kvm, &args);
2456                 mutex_unlock(&kvm->slots_lock);
2457                 break;
2458         }
2459         case KVM_S390_PV_COMMAND: {
2460                 struct kvm_pv_cmd args;
2461
2462                 /* protvirt means user sigp */
2463                 kvm->arch.user_cpu_state_ctrl = 1;
2464                 r = 0;
2465                 if (!is_prot_virt_host()) {
2466                         r = -EINVAL;
2467                         break;
2468                 }
2469                 if (copy_from_user(&args, argp, sizeof(args))) {
2470                         r = -EFAULT;
2471                         break;
2472                 }
2473                 if (args.flags) {
2474                         r = -EINVAL;
2475                         break;
2476                 }
2477                 mutex_lock(&kvm->lock);
2478                 r = kvm_s390_handle_pv(kvm, &args);
2479                 mutex_unlock(&kvm->lock);
2480                 if (copy_to_user(argp, &args, sizeof(args))) {
2481                         r = -EFAULT;
2482                         break;
2483                 }
2484                 break;
2485         }
2486         default:
2487                 r = -ENOTTY;
2488         }
2489
2490         return r;
2491 }
2492
2493 static int kvm_s390_apxa_installed(void)
2494 {
2495         struct ap_config_info info;
2496
2497         if (ap_instructions_available()) {
2498                 if (ap_qci(&info) == 0)
2499                         return info.apxa;
2500         }
2501
2502         return 0;
2503 }
2504
2505 /*
2506  * The format of the crypto control block (CRYCB) is specified in the 3 low
2507  * order bits of the CRYCB designation (CRYCBD) field as follows:
2508  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2509  *           AP extended addressing (APXA) facility are installed.
2510  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2511  * Format 2: Both the APXA and MSAX3 facilities are installed
2512  */
2513 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2514 {
2515         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2516
2517         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2518         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2519
2520         /* Check whether MSAX3 is installed */
2521         if (!test_kvm_facility(kvm, 76))
2522                 return;
2523
2524         if (kvm_s390_apxa_installed())
2525                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2526         else
2527                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2528 }
2529
2530 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2531                                unsigned long *aqm, unsigned long *adm)
2532 {
2533         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2534
2535         mutex_lock(&kvm->lock);
2536         kvm_s390_vcpu_block_all(kvm);
2537
2538         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2539         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2540                 memcpy(crycb->apcb1.apm, apm, 32);
2541                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2542                          apm[0], apm[1], apm[2], apm[3]);
2543                 memcpy(crycb->apcb1.aqm, aqm, 32);
2544                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2545                          aqm[0], aqm[1], aqm[2], aqm[3]);
2546                 memcpy(crycb->apcb1.adm, adm, 32);
2547                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2548                          adm[0], adm[1], adm[2], adm[3]);
2549                 break;
2550         case CRYCB_FORMAT1:
2551         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2552                 memcpy(crycb->apcb0.apm, apm, 8);
2553                 memcpy(crycb->apcb0.aqm, aqm, 2);
2554                 memcpy(crycb->apcb0.adm, adm, 2);
2555                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2556                          apm[0], *((unsigned short *)aqm),
2557                          *((unsigned short *)adm));
2558                 break;
2559         default:        /* Can not happen */
2560                 break;
2561         }
2562
2563         /* recreate the shadow crycb for each vcpu */
2564         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2565         kvm_s390_vcpu_unblock_all(kvm);
2566         mutex_unlock(&kvm->lock);
2567 }
2568 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2569
2570 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2571 {
2572         mutex_lock(&kvm->lock);
2573         kvm_s390_vcpu_block_all(kvm);
2574
2575         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2576                sizeof(kvm->arch.crypto.crycb->apcb0));
2577         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2578                sizeof(kvm->arch.crypto.crycb->apcb1));
2579
2580         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2581         /* recreate the shadow crycb for each vcpu */
2582         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2583         kvm_s390_vcpu_unblock_all(kvm);
2584         mutex_unlock(&kvm->lock);
2585 }
2586 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2587
2588 static u64 kvm_s390_get_initial_cpuid(void)
2589 {
2590         struct cpuid cpuid;
2591
2592         get_cpu_id(&cpuid);
2593         cpuid.version = 0xff;
2594         return *((u64 *) &cpuid);
2595 }
2596
2597 static void kvm_s390_crypto_init(struct kvm *kvm)
2598 {
2599         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2600         kvm_s390_set_crycb_format(kvm);
2601
2602         if (!test_kvm_facility(kvm, 76))
2603                 return;
2604
2605         /* Enable AES/DEA protected key functions by default */
2606         kvm->arch.crypto.aes_kw = 1;
2607         kvm->arch.crypto.dea_kw = 1;
2608         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2609                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2610         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2611                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2612 }
2613
2614 static void sca_dispose(struct kvm *kvm)
2615 {
2616         if (kvm->arch.use_esca)
2617                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2618         else
2619                 free_page((unsigned long)(kvm->arch.sca));
2620         kvm->arch.sca = NULL;
2621 }
2622
2623 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2624 {
2625         gfp_t alloc_flags = GFP_KERNEL;
2626         int i, rc;
2627         char debug_name[16];
2628         static unsigned long sca_offset;
2629
2630         rc = -EINVAL;
2631 #ifdef CONFIG_KVM_S390_UCONTROL
2632         if (type & ~KVM_VM_S390_UCONTROL)
2633                 goto out_err;
2634         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2635                 goto out_err;
2636 #else
2637         if (type)
2638                 goto out_err;
2639 #endif
2640
2641         rc = s390_enable_sie();
2642         if (rc)
2643                 goto out_err;
2644
2645         rc = -ENOMEM;
2646
2647         if (!sclp.has_64bscao)
2648                 alloc_flags |= GFP_DMA;
2649         rwlock_init(&kvm->arch.sca_lock);
2650         /* start with basic SCA */
2651         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2652         if (!kvm->arch.sca)
2653                 goto out_err;
2654         mutex_lock(&kvm_lock);
2655         sca_offset += 16;
2656         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2657                 sca_offset = 0;
2658         kvm->arch.sca = (struct bsca_block *)
2659                         ((char *) kvm->arch.sca + sca_offset);
2660         mutex_unlock(&kvm_lock);
2661
2662         sprintf(debug_name, "kvm-%u", current->pid);
2663
2664         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2665         if (!kvm->arch.dbf)
2666                 goto out_err;
2667
2668         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2669         kvm->arch.sie_page2 =
2670              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
2671         if (!kvm->arch.sie_page2)
2672                 goto out_err;
2673
2674         kvm->arch.sie_page2->kvm = kvm;
2675         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2676
2677         for (i = 0; i < kvm_s390_fac_size(); i++) {
2678                 kvm->arch.model.fac_mask[i] = S390_lowcore.stfle_fac_list[i] &
2679                                               (kvm_s390_fac_base[i] |
2680                                                kvm_s390_fac_ext[i]);
2681                 kvm->arch.model.fac_list[i] = S390_lowcore.stfle_fac_list[i] &
2682                                               kvm_s390_fac_base[i];
2683         }
2684         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2685
2686         /* we are always in czam mode - even on pre z14 machines */
2687         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2688         set_kvm_facility(kvm->arch.model.fac_list, 138);
2689         /* we emulate STHYI in kvm */
2690         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2691         set_kvm_facility(kvm->arch.model.fac_list, 74);
2692         if (MACHINE_HAS_TLB_GUEST) {
2693                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2694                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2695         }
2696
2697         if (css_general_characteristics.aiv && test_facility(65))
2698                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2699
2700         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2701         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2702
2703         kvm_s390_crypto_init(kvm);
2704
2705         mutex_init(&kvm->arch.float_int.ais_lock);
2706         spin_lock_init(&kvm->arch.float_int.lock);
2707         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2708                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2709         init_waitqueue_head(&kvm->arch.ipte_wq);
2710         mutex_init(&kvm->arch.ipte_mutex);
2711
2712         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2713         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2714
2715         if (type & KVM_VM_S390_UCONTROL) {
2716                 kvm->arch.gmap = NULL;
2717                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2718         } else {
2719                 if (sclp.hamax == U64_MAX)
2720                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2721                 else
2722                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2723                                                     sclp.hamax + 1);
2724                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2725                 if (!kvm->arch.gmap)
2726                         goto out_err;
2727                 kvm->arch.gmap->private = kvm;
2728                 kvm->arch.gmap->pfault_enabled = 0;
2729         }
2730
2731         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2732         kvm->arch.use_skf = sclp.has_skey;
2733         spin_lock_init(&kvm->arch.start_stop_lock);
2734         kvm_s390_vsie_init(kvm);
2735         if (use_gisa)
2736                 kvm_s390_gisa_init(kvm);
2737         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2738
2739         return 0;
2740 out_err:
2741         free_page((unsigned long)kvm->arch.sie_page2);
2742         debug_unregister(kvm->arch.dbf);
2743         sca_dispose(kvm);
2744         KVM_EVENT(3, "creation of vm failed: %d", rc);
2745         return rc;
2746 }
2747
2748 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2749 {
2750         u16 rc, rrc;
2751
2752         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2753         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2754         kvm_s390_clear_local_irqs(vcpu);
2755         kvm_clear_async_pf_completion_queue(vcpu);
2756         if (!kvm_is_ucontrol(vcpu->kvm))
2757                 sca_del_vcpu(vcpu);
2758
2759         if (kvm_is_ucontrol(vcpu->kvm))
2760                 gmap_remove(vcpu->arch.gmap);
2761
2762         if (vcpu->kvm->arch.use_cmma)
2763                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2764         /* We can not hold the vcpu mutex here, we are already dying */
2765         if (kvm_s390_pv_cpu_get_handle(vcpu))
2766                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2767         free_page((unsigned long)(vcpu->arch.sie_block));
2768 }
2769
2770 static void kvm_free_vcpus(struct kvm *kvm)
2771 {
2772         unsigned int i;
2773         struct kvm_vcpu *vcpu;
2774
2775         kvm_for_each_vcpu(i, vcpu, kvm)
2776                 kvm_vcpu_destroy(vcpu);
2777
2778         mutex_lock(&kvm->lock);
2779         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2780                 kvm->vcpus[i] = NULL;
2781
2782         atomic_set(&kvm->online_vcpus, 0);
2783         mutex_unlock(&kvm->lock);
2784 }
2785
2786 void kvm_arch_destroy_vm(struct kvm *kvm)
2787 {
2788         u16 rc, rrc;
2789
2790         kvm_free_vcpus(kvm);
2791         sca_dispose(kvm);
2792         kvm_s390_gisa_destroy(kvm);
2793         /*
2794          * We are already at the end of life and kvm->lock is not taken.
2795          * This is ok as the file descriptor is closed by now and nobody
2796          * can mess with the pv state. To avoid lockdep_assert_held from
2797          * complaining we do not use kvm_s390_pv_is_protected.
2798          */
2799         if (kvm_s390_pv_get_handle(kvm))
2800                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2801         debug_unregister(kvm->arch.dbf);
2802         free_page((unsigned long)kvm->arch.sie_page2);
2803         if (!kvm_is_ucontrol(kvm))
2804                 gmap_remove(kvm->arch.gmap);
2805         kvm_s390_destroy_adapters(kvm);
2806         kvm_s390_clear_float_irqs(kvm);
2807         kvm_s390_vsie_destroy(kvm);
2808         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2809 }
2810
2811 /* Section: vcpu related */
2812 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2813 {
2814         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2815         if (!vcpu->arch.gmap)
2816                 return -ENOMEM;
2817         vcpu->arch.gmap->private = vcpu->kvm;
2818
2819         return 0;
2820 }
2821
2822 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2823 {
2824         if (!kvm_s390_use_sca_entries())
2825                 return;
2826         read_lock(&vcpu->kvm->arch.sca_lock);
2827         if (vcpu->kvm->arch.use_esca) {
2828                 struct esca_block *sca = vcpu->kvm->arch.sca;
2829
2830                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2831                 sca->cpu[vcpu->vcpu_id].sda = 0;
2832         } else {
2833                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2834
2835                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2836                 sca->cpu[vcpu->vcpu_id].sda = 0;
2837         }
2838         read_unlock(&vcpu->kvm->arch.sca_lock);
2839 }
2840
2841 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2842 {
2843         if (!kvm_s390_use_sca_entries()) {
2844                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2845
2846                 /* we still need the basic sca for the ipte control */
2847                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2848                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2849                 return;
2850         }
2851         read_lock(&vcpu->kvm->arch.sca_lock);
2852         if (vcpu->kvm->arch.use_esca) {
2853                 struct esca_block *sca = vcpu->kvm->arch.sca;
2854
2855                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2856                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2857                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2858                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2859                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2860         } else {
2861                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2862
2863                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2866                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2867         }
2868         read_unlock(&vcpu->kvm->arch.sca_lock);
2869 }
2870
2871 /* Basic SCA to Extended SCA data copy routines */
2872 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2873 {
2874         d->sda = s->sda;
2875         d->sigp_ctrl.c = s->sigp_ctrl.c;
2876         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2877 }
2878
2879 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2880 {
2881         int i;
2882
2883         d->ipte_control = s->ipte_control;
2884         d->mcn[0] = s->mcn;
2885         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2886                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2887 }
2888
2889 static int sca_switch_to_extended(struct kvm *kvm)
2890 {
2891         struct bsca_block *old_sca = kvm->arch.sca;
2892         struct esca_block *new_sca;
2893         struct kvm_vcpu *vcpu;
2894         unsigned int vcpu_idx;
2895         u32 scaol, scaoh;
2896
2897         if (kvm->arch.use_esca)
2898                 return 0;
2899
2900         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2901         if (!new_sca)
2902                 return -ENOMEM;
2903
2904         scaoh = (u32)((u64)(new_sca) >> 32);
2905         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2906
2907         kvm_s390_vcpu_block_all(kvm);
2908         write_lock(&kvm->arch.sca_lock);
2909
2910         sca_copy_b_to_e(new_sca, old_sca);
2911
2912         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2913                 vcpu->arch.sie_block->scaoh = scaoh;
2914                 vcpu->arch.sie_block->scaol = scaol;
2915                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2916         }
2917         kvm->arch.sca = new_sca;
2918         kvm->arch.use_esca = 1;
2919
2920         write_unlock(&kvm->arch.sca_lock);
2921         kvm_s390_vcpu_unblock_all(kvm);
2922
2923         free_page((unsigned long)old_sca);
2924
2925         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2926                  old_sca, kvm->arch.sca);
2927         return 0;
2928 }
2929
2930 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2931 {
2932         int rc;
2933
2934         if (!kvm_s390_use_sca_entries()) {
2935                 if (id < KVM_MAX_VCPUS)
2936                         return true;
2937                 return false;
2938         }
2939         if (id < KVM_S390_BSCA_CPU_SLOTS)
2940                 return true;
2941         if (!sclp.has_esca || !sclp.has_64bscao)
2942                 return false;
2943
2944         mutex_lock(&kvm->lock);
2945         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2946         mutex_unlock(&kvm->lock);
2947
2948         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2949 }
2950
2951 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2952 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2953 {
2954         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2955         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2956         vcpu->arch.cputm_start = get_tod_clock_fast();
2957         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2958 }
2959
2960 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2961 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2962 {
2963         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2964         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2965         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2966         vcpu->arch.cputm_start = 0;
2967         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2968 }
2969
2970 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2971 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2972 {
2973         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2974         vcpu->arch.cputm_enabled = true;
2975         __start_cpu_timer_accounting(vcpu);
2976 }
2977
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2980 {
2981         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2982         __stop_cpu_timer_accounting(vcpu);
2983         vcpu->arch.cputm_enabled = false;
2984 }
2985
2986 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2987 {
2988         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2989         __enable_cpu_timer_accounting(vcpu);
2990         preempt_enable();
2991 }
2992
2993 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2994 {
2995         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2996         __disable_cpu_timer_accounting(vcpu);
2997         preempt_enable();
2998 }
2999
3000 /* set the cpu timer - may only be called from the VCPU thread itself */
3001 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3002 {
3003         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3005         if (vcpu->arch.cputm_enabled)
3006                 vcpu->arch.cputm_start = get_tod_clock_fast();
3007         vcpu->arch.sie_block->cputm = cputm;
3008         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3009         preempt_enable();
3010 }
3011
3012 /* update and get the cpu timer - can also be called from other VCPU threads */
3013 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3014 {
3015         unsigned int seq;
3016         __u64 value;
3017
3018         if (unlikely(!vcpu->arch.cputm_enabled))
3019                 return vcpu->arch.sie_block->cputm;
3020
3021         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3022         do {
3023                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3024                 /*
3025                  * If the writer would ever execute a read in the critical
3026                  * section, e.g. in irq context, we have a deadlock.
3027                  */
3028                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3029                 value = vcpu->arch.sie_block->cputm;
3030                 /* if cputm_start is 0, accounting is being started/stopped */
3031                 if (likely(vcpu->arch.cputm_start))
3032                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3033         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3034         preempt_enable();
3035         return value;
3036 }
3037
3038 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3039 {
3040
3041         gmap_enable(vcpu->arch.enabled_gmap);
3042         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3043         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3044                 __start_cpu_timer_accounting(vcpu);
3045         vcpu->cpu = cpu;
3046 }
3047
3048 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3049 {
3050         vcpu->cpu = -1;
3051         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052                 __stop_cpu_timer_accounting(vcpu);
3053         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3054         vcpu->arch.enabled_gmap = gmap_get_enabled();
3055         gmap_disable(vcpu->arch.enabled_gmap);
3056
3057 }
3058
3059 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3060 {
3061         mutex_lock(&vcpu->kvm->lock);
3062         preempt_disable();
3063         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3064         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3065         preempt_enable();
3066         mutex_unlock(&vcpu->kvm->lock);
3067         if (!kvm_is_ucontrol(vcpu->kvm)) {
3068                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3069                 sca_add_vcpu(vcpu);
3070         }
3071         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3072                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3073         /* make vcpu_load load the right gmap on the first trigger */
3074         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3075 }
3076
3077 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3078 {
3079         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3080             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3081                 return true;
3082         return false;
3083 }
3084
3085 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3086 {
3087         /* At least one ECC subfunction must be present */
3088         return kvm_has_pckmo_subfunc(kvm, 32) ||
3089                kvm_has_pckmo_subfunc(kvm, 33) ||
3090                kvm_has_pckmo_subfunc(kvm, 34) ||
3091                kvm_has_pckmo_subfunc(kvm, 40) ||
3092                kvm_has_pckmo_subfunc(kvm, 41);
3093
3094 }
3095
3096 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3097 {
3098         /*
3099          * If the AP instructions are not being interpreted and the MSAX3
3100          * facility is not configured for the guest, there is nothing to set up.
3101          */
3102         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3103                 return;
3104
3105         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3106         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3107         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3108         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3109
3110         if (vcpu->kvm->arch.crypto.apie)
3111                 vcpu->arch.sie_block->eca |= ECA_APIE;
3112
3113         /* Set up protected key support */
3114         if (vcpu->kvm->arch.crypto.aes_kw) {
3115                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3116                 /* ecc is also wrapped with AES key */
3117                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3118                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3119         }
3120
3121         if (vcpu->kvm->arch.crypto.dea_kw)
3122                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3123 }
3124
3125 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3126 {
3127         free_page(vcpu->arch.sie_block->cbrlo);
3128         vcpu->arch.sie_block->cbrlo = 0;
3129 }
3130
3131 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3132 {
3133         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
3134         if (!vcpu->arch.sie_block->cbrlo)
3135                 return -ENOMEM;
3136         return 0;
3137 }
3138
3139 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3140 {
3141         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3142
3143         vcpu->arch.sie_block->ibc = model->ibc;
3144         if (test_kvm_facility(vcpu->kvm, 7))
3145                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3146 }
3147
3148 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3149 {
3150         int rc = 0;
3151         u16 uvrc, uvrrc;
3152
3153         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3154                                                     CPUSTAT_SM |
3155                                                     CPUSTAT_STOPPED);
3156
3157         if (test_kvm_facility(vcpu->kvm, 78))
3158                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3159         else if (test_kvm_facility(vcpu->kvm, 8))
3160                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3161
3162         kvm_s390_vcpu_setup_model(vcpu);
3163
3164         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3165         if (MACHINE_HAS_ESOP)
3166                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3167         if (test_kvm_facility(vcpu->kvm, 9))
3168                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3169         if (test_kvm_facility(vcpu->kvm, 73))
3170                 vcpu->arch.sie_block->ecb |= ECB_TE;
3171
3172         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3173                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3174         if (test_kvm_facility(vcpu->kvm, 130))
3175                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3176         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3177         if (sclp.has_cei)
3178                 vcpu->arch.sie_block->eca |= ECA_CEI;
3179         if (sclp.has_ib)
3180                 vcpu->arch.sie_block->eca |= ECA_IB;
3181         if (sclp.has_siif)
3182                 vcpu->arch.sie_block->eca |= ECA_SII;
3183         if (sclp.has_sigpif)
3184                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3185         if (test_kvm_facility(vcpu->kvm, 129)) {
3186                 vcpu->arch.sie_block->eca |= ECA_VX;
3187                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3188         }
3189         if (test_kvm_facility(vcpu->kvm, 139))
3190                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3191         if (test_kvm_facility(vcpu->kvm, 156))
3192                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3193         if (vcpu->arch.sie_block->gd) {
3194                 vcpu->arch.sie_block->eca |= ECA_AIV;
3195                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3196                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3197         }
3198         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3199                                         | SDNXC;
3200         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3201
3202         if (sclp.has_kss)
3203                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3204         else
3205                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3206
3207         if (vcpu->kvm->arch.use_cmma) {
3208                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3209                 if (rc)
3210                         return rc;
3211         }
3212         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3213         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3214
3215         vcpu->arch.sie_block->hpid = HPID_KVM;
3216
3217         kvm_s390_vcpu_crypto_setup(vcpu);
3218
3219         mutex_lock(&vcpu->kvm->lock);
3220         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3221                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3222                 if (rc)
3223                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3224         }
3225         mutex_unlock(&vcpu->kvm->lock);
3226
3227         return rc;
3228 }
3229
3230 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3231 {
3232         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3233                 return -EINVAL;
3234         return 0;
3235 }
3236
3237 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3238 {
3239         struct sie_page *sie_page;
3240         int rc;
3241
3242         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3243         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
3244         if (!sie_page)
3245                 return -ENOMEM;
3246
3247         vcpu->arch.sie_block = &sie_page->sie_block;
3248         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3249
3250         /* the real guest size will always be smaller than msl */
3251         vcpu->arch.sie_block->mso = 0;
3252         vcpu->arch.sie_block->msl = sclp.hamax;
3253
3254         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3255         spin_lock_init(&vcpu->arch.local_int.lock);
3256         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3257         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3258                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3259         seqcount_init(&vcpu->arch.cputm_seqcount);
3260
3261         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3262         kvm_clear_async_pf_completion_queue(vcpu);
3263         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3264                                     KVM_SYNC_GPRS |
3265                                     KVM_SYNC_ACRS |
3266                                     KVM_SYNC_CRS |
3267                                     KVM_SYNC_ARCH0 |
3268                                     KVM_SYNC_PFAULT;
3269         kvm_s390_set_prefix(vcpu, 0);
3270         if (test_kvm_facility(vcpu->kvm, 64))
3271                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3272         if (test_kvm_facility(vcpu->kvm, 82))
3273                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3274         if (test_kvm_facility(vcpu->kvm, 133))
3275                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3276         if (test_kvm_facility(vcpu->kvm, 156))
3277                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3278         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3279          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3280          */
3281         if (MACHINE_HAS_VX)
3282                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3283         else
3284                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3285
3286         if (kvm_is_ucontrol(vcpu->kvm)) {
3287                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3288                 if (rc)
3289                         goto out_free_sie_block;
3290         }
3291
3292         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3293                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3294         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3295
3296         rc = kvm_s390_vcpu_setup(vcpu);
3297         if (rc)
3298                 goto out_ucontrol_uninit;
3299         return 0;
3300
3301 out_ucontrol_uninit:
3302         if (kvm_is_ucontrol(vcpu->kvm))
3303                 gmap_remove(vcpu->arch.gmap);
3304 out_free_sie_block:
3305         free_page((unsigned long)(vcpu->arch.sie_block));
3306         return rc;
3307 }
3308
3309 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3310 {
3311         return kvm_s390_vcpu_has_irq(vcpu, 0);
3312 }
3313
3314 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3315 {
3316         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3317 }
3318
3319 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3320 {
3321         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3322         exit_sie(vcpu);
3323 }
3324
3325 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3326 {
3327         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3328 }
3329
3330 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3331 {
3332         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3333         exit_sie(vcpu);
3334 }
3335
3336 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3337 {
3338         return atomic_read(&vcpu->arch.sie_block->prog20) &
3339                (PROG_BLOCK_SIE | PROG_REQUEST);
3340 }
3341
3342 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3343 {
3344         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3345 }
3346
3347 /*
3348  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3349  * If the CPU is not running (e.g. waiting as idle) the function will
3350  * return immediately. */
3351 void exit_sie(struct kvm_vcpu *vcpu)
3352 {
3353         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3354         kvm_s390_vsie_kick(vcpu);
3355         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3356                 cpu_relax();
3357 }
3358
3359 /* Kick a guest cpu out of SIE to process a request synchronously */
3360 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3361 {
3362         kvm_make_request(req, vcpu);
3363         kvm_s390_vcpu_request(vcpu);
3364 }
3365
3366 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3367                               unsigned long end)
3368 {
3369         struct kvm *kvm = gmap->private;
3370         struct kvm_vcpu *vcpu;
3371         unsigned long prefix;
3372         int i;
3373
3374         if (gmap_is_shadow(gmap))
3375                 return;
3376         if (start >= 1UL << 31)
3377                 /* We are only interested in prefix pages */
3378                 return;
3379         kvm_for_each_vcpu(i, vcpu, kvm) {
3380                 /* match against both prefix pages */
3381                 prefix = kvm_s390_get_prefix(vcpu);
3382                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3383                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3384                                    start, end);
3385                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3386                 }
3387         }
3388 }
3389
3390 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3391 {
3392         /* do not poll with more than halt_poll_max_steal percent of steal time */
3393         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3394             halt_poll_max_steal) {
3395                 vcpu->stat.halt_no_poll_steal++;
3396                 return true;
3397         }
3398         return false;
3399 }
3400
3401 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3402 {
3403         /* kvm common code refers to this, but never calls it */
3404         BUG();
3405         return 0;
3406 }
3407
3408 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3409                                            struct kvm_one_reg *reg)
3410 {
3411         int r = -EINVAL;
3412
3413         switch (reg->id) {
3414         case KVM_REG_S390_TODPR:
3415                 r = put_user(vcpu->arch.sie_block->todpr,
3416                              (u32 __user *)reg->addr);
3417                 break;
3418         case KVM_REG_S390_EPOCHDIFF:
3419                 r = put_user(vcpu->arch.sie_block->epoch,
3420                              (u64 __user *)reg->addr);
3421                 break;
3422         case KVM_REG_S390_CPU_TIMER:
3423                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3424                              (u64 __user *)reg->addr);
3425                 break;
3426         case KVM_REG_S390_CLOCK_COMP:
3427                 r = put_user(vcpu->arch.sie_block->ckc,
3428                              (u64 __user *)reg->addr);
3429                 break;
3430         case KVM_REG_S390_PFTOKEN:
3431                 r = put_user(vcpu->arch.pfault_token,
3432                              (u64 __user *)reg->addr);
3433                 break;
3434         case KVM_REG_S390_PFCOMPARE:
3435                 r = put_user(vcpu->arch.pfault_compare,
3436                              (u64 __user *)reg->addr);
3437                 break;
3438         case KVM_REG_S390_PFSELECT:
3439                 r = put_user(vcpu->arch.pfault_select,
3440                              (u64 __user *)reg->addr);
3441                 break;
3442         case KVM_REG_S390_PP:
3443                 r = put_user(vcpu->arch.sie_block->pp,
3444                              (u64 __user *)reg->addr);
3445                 break;
3446         case KVM_REG_S390_GBEA:
3447                 r = put_user(vcpu->arch.sie_block->gbea,
3448                              (u64 __user *)reg->addr);
3449                 break;
3450         default:
3451                 break;
3452         }
3453
3454         return r;
3455 }
3456
3457 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3458                                            struct kvm_one_reg *reg)
3459 {
3460         int r = -EINVAL;
3461         __u64 val;
3462
3463         switch (reg->id) {
3464         case KVM_REG_S390_TODPR:
3465                 r = get_user(vcpu->arch.sie_block->todpr,
3466                              (u32 __user *)reg->addr);
3467                 break;
3468         case KVM_REG_S390_EPOCHDIFF:
3469                 r = get_user(vcpu->arch.sie_block->epoch,
3470                              (u64 __user *)reg->addr);
3471                 break;
3472         case KVM_REG_S390_CPU_TIMER:
3473                 r = get_user(val, (u64 __user *)reg->addr);
3474                 if (!r)
3475                         kvm_s390_set_cpu_timer(vcpu, val);
3476                 break;
3477         case KVM_REG_S390_CLOCK_COMP:
3478                 r = get_user(vcpu->arch.sie_block->ckc,
3479                              (u64 __user *)reg->addr);
3480                 break;
3481         case KVM_REG_S390_PFTOKEN:
3482                 r = get_user(vcpu->arch.pfault_token,
3483                              (u64 __user *)reg->addr);
3484                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3485                         kvm_clear_async_pf_completion_queue(vcpu);
3486                 break;
3487         case KVM_REG_S390_PFCOMPARE:
3488                 r = get_user(vcpu->arch.pfault_compare,
3489                              (u64 __user *)reg->addr);
3490                 break;
3491         case KVM_REG_S390_PFSELECT:
3492                 r = get_user(vcpu->arch.pfault_select,
3493                              (u64 __user *)reg->addr);
3494                 break;
3495         case KVM_REG_S390_PP:
3496                 r = get_user(vcpu->arch.sie_block->pp,
3497                              (u64 __user *)reg->addr);
3498                 break;
3499         case KVM_REG_S390_GBEA:
3500                 r = get_user(vcpu->arch.sie_block->gbea,
3501                              (u64 __user *)reg->addr);
3502                 break;
3503         default:
3504                 break;
3505         }
3506
3507         return r;
3508 }
3509
3510 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3511 {
3512         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3513         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3514         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3515
3516         kvm_clear_async_pf_completion_queue(vcpu);
3517         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3518                 kvm_s390_vcpu_stop(vcpu);
3519         kvm_s390_clear_local_irqs(vcpu);
3520 }
3521
3522 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3523 {
3524         /* Initial reset is a superset of the normal reset */
3525         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3526
3527         /*
3528          * This equals initial cpu reset in pop, but we don't switch to ESA.
3529          * We do not only reset the internal data, but also ...
3530          */
3531         vcpu->arch.sie_block->gpsw.mask = 0;
3532         vcpu->arch.sie_block->gpsw.addr = 0;
3533         kvm_s390_set_prefix(vcpu, 0);
3534         kvm_s390_set_cpu_timer(vcpu, 0);
3535         vcpu->arch.sie_block->ckc = 0;
3536         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3537         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3538         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3539
3540         /* ... the data in sync regs */
3541         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3542         vcpu->run->s.regs.ckc = 0;
3543         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3544         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3545         vcpu->run->psw_addr = 0;
3546         vcpu->run->psw_mask = 0;
3547         vcpu->run->s.regs.todpr = 0;
3548         vcpu->run->s.regs.cputm = 0;
3549         vcpu->run->s.regs.ckc = 0;
3550         vcpu->run->s.regs.pp = 0;
3551         vcpu->run->s.regs.gbea = 1;
3552         vcpu->run->s.regs.fpc = 0;
3553         /*
3554          * Do not reset these registers in the protected case, as some of
3555          * them are overlayed and they are not accessible in this case
3556          * anyway.
3557          */
3558         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3559                 vcpu->arch.sie_block->gbea = 1;
3560                 vcpu->arch.sie_block->pp = 0;
3561                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3562                 vcpu->arch.sie_block->todpr = 0;
3563         }
3564 }
3565
3566 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3567 {
3568         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3569
3570         /* Clear reset is a superset of the initial reset */
3571         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3572
3573         memset(&regs->gprs, 0, sizeof(regs->gprs));
3574         memset(&regs->vrs, 0, sizeof(regs->vrs));
3575         memset(&regs->acrs, 0, sizeof(regs->acrs));
3576         memset(&regs->gscb, 0, sizeof(regs->gscb));
3577
3578         regs->etoken = 0;
3579         regs->etoken_extension = 0;
3580 }
3581
3582 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3583 {
3584         vcpu_load(vcpu);
3585         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3586         vcpu_put(vcpu);
3587         return 0;
3588 }
3589
3590 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3591 {
3592         vcpu_load(vcpu);
3593         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3594         vcpu_put(vcpu);
3595         return 0;
3596 }
3597
3598 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3599                                   struct kvm_sregs *sregs)
3600 {
3601         vcpu_load(vcpu);
3602
3603         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3604         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3605
3606         vcpu_put(vcpu);
3607         return 0;
3608 }
3609
3610 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3611                                   struct kvm_sregs *sregs)
3612 {
3613         vcpu_load(vcpu);
3614
3615         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3616         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3617
3618         vcpu_put(vcpu);
3619         return 0;
3620 }
3621
3622 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3623 {
3624         int ret = 0;
3625
3626         vcpu_load(vcpu);
3627
3628         if (test_fp_ctl(fpu->fpc)) {
3629                 ret = -EINVAL;
3630                 goto out;
3631         }
3632         vcpu->run->s.regs.fpc = fpu->fpc;
3633         if (MACHINE_HAS_VX)
3634                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3635                                  (freg_t *) fpu->fprs);
3636         else
3637                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3638
3639 out:
3640         vcpu_put(vcpu);
3641         return ret;
3642 }
3643
3644 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3645 {
3646         vcpu_load(vcpu);
3647
3648         /* make sure we have the latest values */
3649         save_fpu_regs();
3650         if (MACHINE_HAS_VX)
3651                 convert_vx_to_fp((freg_t *) fpu->fprs,
3652                                  (__vector128 *) vcpu->run->s.regs.vrs);
3653         else
3654                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3655         fpu->fpc = vcpu->run->s.regs.fpc;
3656
3657         vcpu_put(vcpu);
3658         return 0;
3659 }
3660
3661 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3662 {
3663         int rc = 0;
3664
3665         if (!is_vcpu_stopped(vcpu))
3666                 rc = -EBUSY;
3667         else {
3668                 vcpu->run->psw_mask = psw.mask;
3669                 vcpu->run->psw_addr = psw.addr;
3670         }
3671         return rc;
3672 }
3673
3674 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3675                                   struct kvm_translation *tr)
3676 {
3677         return -EINVAL; /* not implemented yet */
3678 }
3679
3680 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3681                               KVM_GUESTDBG_USE_HW_BP | \
3682                               KVM_GUESTDBG_ENABLE)
3683
3684 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3685                                         struct kvm_guest_debug *dbg)
3686 {
3687         int rc = 0;
3688
3689         vcpu_load(vcpu);
3690
3691         vcpu->guest_debug = 0;
3692         kvm_s390_clear_bp_data(vcpu);
3693
3694         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3695                 rc = -EINVAL;
3696                 goto out;
3697         }
3698         if (!sclp.has_gpere) {
3699                 rc = -EINVAL;
3700                 goto out;
3701         }
3702
3703         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3704                 vcpu->guest_debug = dbg->control;
3705                 /* enforce guest PER */
3706                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3707
3708                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3709                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3710         } else {
3711                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3712                 vcpu->arch.guestdbg.last_bp = 0;
3713         }
3714
3715         if (rc) {
3716                 vcpu->guest_debug = 0;
3717                 kvm_s390_clear_bp_data(vcpu);
3718                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3719         }
3720
3721 out:
3722         vcpu_put(vcpu);
3723         return rc;
3724 }
3725
3726 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3727                                     struct kvm_mp_state *mp_state)
3728 {
3729         int ret;
3730
3731         vcpu_load(vcpu);
3732
3733         /* CHECK_STOP and LOAD are not supported yet */
3734         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3735                                       KVM_MP_STATE_OPERATING;
3736
3737         vcpu_put(vcpu);
3738         return ret;
3739 }
3740
3741 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3742                                     struct kvm_mp_state *mp_state)
3743 {
3744         int rc = 0;
3745
3746         vcpu_load(vcpu);
3747
3748         /* user space knows about this interface - let it control the state */
3749         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3750
3751         switch (mp_state->mp_state) {
3752         case KVM_MP_STATE_STOPPED:
3753                 rc = kvm_s390_vcpu_stop(vcpu);
3754                 break;
3755         case KVM_MP_STATE_OPERATING:
3756                 rc = kvm_s390_vcpu_start(vcpu);
3757                 break;
3758         case KVM_MP_STATE_LOAD:
3759                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3760                         rc = -ENXIO;
3761                         break;
3762                 }
3763                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3764                 break;
3765         case KVM_MP_STATE_CHECK_STOP:
3766                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3767         default:
3768                 rc = -ENXIO;
3769         }
3770
3771         vcpu_put(vcpu);
3772         return rc;
3773 }
3774
3775 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3776 {
3777         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3778 }
3779
3780 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3781 {
3782 retry:
3783         kvm_s390_vcpu_request_handled(vcpu);
3784         if (!kvm_request_pending(vcpu))
3785                 return 0;
3786         /*
3787          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3788          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3789          * This ensures that the ipte instruction for this request has
3790          * already finished. We might race against a second unmapper that
3791          * wants to set the blocking bit. Lets just retry the request loop.
3792          */
3793         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3794                 int rc;
3795                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3796                                           kvm_s390_get_prefix(vcpu),
3797                                           PAGE_SIZE * 2, PROT_WRITE);
3798                 if (rc) {
3799                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3800                         return rc;
3801                 }
3802                 goto retry;
3803         }
3804
3805         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3806                 vcpu->arch.sie_block->ihcpu = 0xffff;
3807                 goto retry;
3808         }
3809
3810         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3811                 if (!ibs_enabled(vcpu)) {
3812                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3813                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3814                 }
3815                 goto retry;
3816         }
3817
3818         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3819                 if (ibs_enabled(vcpu)) {
3820                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3821                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3822                 }
3823                 goto retry;
3824         }
3825
3826         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3827                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3828                 goto retry;
3829         }
3830
3831         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3832                 /*
3833                  * Disable CMM virtualization; we will emulate the ESSA
3834                  * instruction manually, in order to provide additional
3835                  * functionalities needed for live migration.
3836                  */
3837                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3838                 goto retry;
3839         }
3840
3841         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3842                 /*
3843                  * Re-enable CMM virtualization if CMMA is available and
3844                  * CMM has been used.
3845                  */
3846                 if ((vcpu->kvm->arch.use_cmma) &&
3847                     (vcpu->kvm->mm->context.uses_cmm))
3848                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3849                 goto retry;
3850         }
3851
3852         /* nothing to do, just clear the request */
3853         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3854         /* we left the vsie handler, nothing to do, just clear the request */
3855         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3856
3857         return 0;
3858 }
3859
3860 void kvm_s390_set_tod_clock(struct kvm *kvm,
3861                             const struct kvm_s390_vm_tod_clock *gtod)
3862 {
3863         struct kvm_vcpu *vcpu;
3864         struct kvm_s390_tod_clock_ext htod;
3865         int i;
3866
3867         mutex_lock(&kvm->lock);
3868         preempt_disable();
3869
3870         get_tod_clock_ext((char *)&htod);
3871
3872         kvm->arch.epoch = gtod->tod - htod.tod;
3873         kvm->arch.epdx = 0;
3874         if (test_kvm_facility(kvm, 139)) {
3875                 kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
3876                 if (kvm->arch.epoch > gtod->tod)
3877                         kvm->arch.epdx -= 1;
3878         }
3879
3880         kvm_s390_vcpu_block_all(kvm);
3881         kvm_for_each_vcpu(i, vcpu, kvm) {
3882                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3883                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3884         }
3885
3886         kvm_s390_vcpu_unblock_all(kvm);
3887         preempt_enable();
3888         mutex_unlock(&kvm->lock);
3889 }
3890
3891 /**
3892  * kvm_arch_fault_in_page - fault-in guest page if necessary
3893  * @vcpu: The corresponding virtual cpu
3894  * @gpa: Guest physical address
3895  * @writable: Whether the page should be writable or not
3896  *
3897  * Make sure that a guest page has been faulted-in on the host.
3898  *
3899  * Return: Zero on success, negative error code otherwise.
3900  */
3901 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3902 {
3903         return gmap_fault(vcpu->arch.gmap, gpa,
3904                           writable ? FAULT_FLAG_WRITE : 0);
3905 }
3906
3907 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3908                                       unsigned long token)
3909 {
3910         struct kvm_s390_interrupt inti;
3911         struct kvm_s390_irq irq;
3912
3913         if (start_token) {
3914                 irq.u.ext.ext_params2 = token;
3915                 irq.type = KVM_S390_INT_PFAULT_INIT;
3916                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3917         } else {
3918                 inti.type = KVM_S390_INT_PFAULT_DONE;
3919                 inti.parm64 = token;
3920                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3921         }
3922 }
3923
3924 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3925                                      struct kvm_async_pf *work)
3926 {
3927         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3928         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3929 }
3930
3931 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3932                                  struct kvm_async_pf *work)
3933 {
3934         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3935         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3936 }
3937
3938 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3939                                struct kvm_async_pf *work)
3940 {
3941         /* s390 will always inject the page directly */
3942 }
3943
3944 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3945 {
3946         /*
3947          * s390 will always inject the page directly,
3948          * but we still want check_async_completion to cleanup
3949          */
3950         return true;
3951 }
3952
3953 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3954 {
3955         hva_t hva;
3956         struct kvm_arch_async_pf arch;
3957         int rc;
3958
3959         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3960                 return 0;
3961         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3962             vcpu->arch.pfault_compare)
3963                 return 0;
3964         if (psw_extint_disabled(vcpu))
3965                 return 0;
3966         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3967                 return 0;
3968         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3969                 return 0;
3970         if (!vcpu->arch.gmap->pfault_enabled)
3971                 return 0;
3972
3973         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3974         hva += current->thread.gmap_addr & ~PAGE_MASK;
3975         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3976                 return 0;
3977
3978         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3979         return rc;
3980 }
3981
3982 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3983 {
3984         int rc, cpuflags;
3985
3986         /*
3987          * On s390 notifications for arriving pages will be delivered directly
3988          * to the guest but the house keeping for completed pfaults is
3989          * handled outside the worker.
3990          */
3991         kvm_check_async_pf_completion(vcpu);
3992
3993         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3994         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3995
3996         if (need_resched())
3997                 schedule();
3998
3999         if (test_cpu_flag(CIF_MCCK_PENDING))
4000                 s390_handle_mcck();
4001
4002         if (!kvm_is_ucontrol(vcpu->kvm)) {
4003                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4004                 if (rc)
4005                         return rc;
4006         }
4007
4008         rc = kvm_s390_handle_requests(vcpu);
4009         if (rc)
4010                 return rc;
4011
4012         if (guestdbg_enabled(vcpu)) {
4013                 kvm_s390_backup_guest_per_regs(vcpu);
4014                 kvm_s390_patch_guest_per_regs(vcpu);
4015         }
4016
4017         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4018
4019         vcpu->arch.sie_block->icptcode = 0;
4020         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4021         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4022         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4023
4024         return 0;
4025 }
4026
4027 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4028 {
4029         struct kvm_s390_pgm_info pgm_info = {
4030                 .code = PGM_ADDRESSING,
4031         };
4032         u8 opcode, ilen;
4033         int rc;
4034
4035         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4036         trace_kvm_s390_sie_fault(vcpu);
4037
4038         /*
4039          * We want to inject an addressing exception, which is defined as a
4040          * suppressing or terminating exception. However, since we came here
4041          * by a DAT access exception, the PSW still points to the faulting
4042          * instruction since DAT exceptions are nullifying. So we've got
4043          * to look up the current opcode to get the length of the instruction
4044          * to be able to forward the PSW.
4045          */
4046         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4047         ilen = insn_length(opcode);
4048         if (rc < 0) {
4049                 return rc;
4050         } else if (rc) {
4051                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4052                  * Forward by arbitrary ilc, injection will take care of
4053                  * nullification if necessary.
4054                  */
4055                 pgm_info = vcpu->arch.pgm;
4056                 ilen = 4;
4057         }
4058         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4059         kvm_s390_forward_psw(vcpu, ilen);
4060         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4061 }
4062
4063 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4064 {
4065         struct mcck_volatile_info *mcck_info;
4066         struct sie_page *sie_page;
4067
4068         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4069                    vcpu->arch.sie_block->icptcode);
4070         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4071
4072         if (guestdbg_enabled(vcpu))
4073                 kvm_s390_restore_guest_per_regs(vcpu);
4074
4075         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4076         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4077
4078         if (exit_reason == -EINTR) {
4079                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4080                 sie_page = container_of(vcpu->arch.sie_block,
4081                                         struct sie_page, sie_block);
4082                 mcck_info = &sie_page->mcck_info;
4083                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4084                 return 0;
4085         }
4086
4087         if (vcpu->arch.sie_block->icptcode > 0) {
4088                 int rc = kvm_handle_sie_intercept(vcpu);
4089
4090                 if (rc != -EOPNOTSUPP)
4091                         return rc;
4092                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4093                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4094                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4095                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4096                 return -EREMOTE;
4097         } else if (exit_reason != -EFAULT) {
4098                 vcpu->stat.exit_null++;
4099                 return 0;
4100         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4101                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4102                 vcpu->run->s390_ucontrol.trans_exc_code =
4103                                                 current->thread.gmap_addr;
4104                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4105                 return -EREMOTE;
4106         } else if (current->thread.gmap_pfault) {
4107                 trace_kvm_s390_major_guest_pfault(vcpu);
4108                 current->thread.gmap_pfault = 0;
4109                 if (kvm_arch_setup_async_pf(vcpu))
4110                         return 0;
4111                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4112         }
4113         return vcpu_post_run_fault_in_sie(vcpu);
4114 }
4115
4116 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4117 static int __vcpu_run(struct kvm_vcpu *vcpu)
4118 {
4119         int rc, exit_reason;
4120         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4121
4122         /*
4123          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4124          * ning the guest), so that memslots (and other stuff) are protected
4125          */
4126         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4127
4128         do {
4129                 rc = vcpu_pre_run(vcpu);
4130                 if (rc)
4131                         break;
4132
4133                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4134                 /*
4135                  * As PF_VCPU will be used in fault handler, between
4136                  * guest_enter and guest_exit should be no uaccess.
4137                  */
4138                 local_irq_disable();
4139                 guest_enter_irqoff();
4140                 __disable_cpu_timer_accounting(vcpu);
4141                 local_irq_enable();
4142                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4143                         memcpy(sie_page->pv_grregs,
4144                                vcpu->run->s.regs.gprs,
4145                                sizeof(sie_page->pv_grregs));
4146                 }
4147                 exit_reason = sie64a(vcpu->arch.sie_block,
4148                                      vcpu->run->s.regs.gprs);
4149                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4150                         memcpy(vcpu->run->s.regs.gprs,
4151                                sie_page->pv_grregs,
4152                                sizeof(sie_page->pv_grregs));
4153                         /*
4154                          * We're not allowed to inject interrupts on intercepts
4155                          * that leave the guest state in an "in-between" state
4156                          * where the next SIE entry will do a continuation.
4157                          * Fence interrupts in our "internal" PSW.
4158                          */
4159                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4160                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4161                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4162                         }
4163                 }
4164                 local_irq_disable();
4165                 __enable_cpu_timer_accounting(vcpu);
4166                 guest_exit_irqoff();
4167                 local_irq_enable();
4168                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4169
4170                 rc = vcpu_post_run(vcpu, exit_reason);
4171         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4172
4173         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4174         return rc;
4175 }
4176
4177 static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4178 {
4179         struct runtime_instr_cb *riccb;
4180         struct gs_cb *gscb;
4181
4182         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4183         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4184         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4185         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4186         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4187                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4188                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4189                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4190         }
4191         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4192                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4193                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4194                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4195                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4196                         kvm_clear_async_pf_completion_queue(vcpu);
4197         }
4198         /*
4199          * If userspace sets the riccb (e.g. after migration) to a valid state,
4200          * we should enable RI here instead of doing the lazy enablement.
4201          */
4202         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4203             test_kvm_facility(vcpu->kvm, 64) &&
4204             riccb->v &&
4205             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4206                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4207                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4208         }
4209         /*
4210          * If userspace sets the gscb (e.g. after migration) to non-zero,
4211          * we should enable GS here instead of doing the lazy enablement.
4212          */
4213         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4214             test_kvm_facility(vcpu->kvm, 133) &&
4215             gscb->gssm &&
4216             !vcpu->arch.gs_enabled) {
4217                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4218                 vcpu->arch.sie_block->ecb |= ECB_GS;
4219                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4220                 vcpu->arch.gs_enabled = 1;
4221         }
4222         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4223             test_kvm_facility(vcpu->kvm, 82)) {
4224                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4225                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4226         }
4227         if (MACHINE_HAS_GS) {
4228                 preempt_disable();
4229                 __ctl_set_bit(2, 4);
4230                 if (current->thread.gs_cb) {
4231                         vcpu->arch.host_gscb = current->thread.gs_cb;
4232                         save_gs_cb(vcpu->arch.host_gscb);
4233                 }
4234                 if (vcpu->arch.gs_enabled) {
4235                         current->thread.gs_cb = (struct gs_cb *)
4236                                                 &vcpu->run->s.regs.gscb;
4237                         restore_gs_cb(current->thread.gs_cb);
4238                 }
4239                 preempt_enable();
4240         }
4241         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4242 }
4243
4244 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4245 {
4246         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4247                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4248         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4249                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4250                 /* some control register changes require a tlb flush */
4251                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4252         }
4253         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4254                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4255                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4256         }
4257         save_access_regs(vcpu->arch.host_acrs);
4258         restore_access_regs(vcpu->run->s.regs.acrs);
4259         /* save host (userspace) fprs/vrs */
4260         save_fpu_regs();
4261         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4262         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4263         if (MACHINE_HAS_VX)
4264                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4265         else
4266                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4267         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4268         if (test_fp_ctl(current->thread.fpu.fpc))
4269                 /* User space provided an invalid FPC, let's clear it */
4270                 current->thread.fpu.fpc = 0;
4271
4272         /* Sync fmt2 only data */
4273         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4274                 sync_regs_fmt2(vcpu, kvm_run);
4275         } else {
4276                 /*
4277                  * In several places we have to modify our internal view to
4278                  * not do things that are disallowed by the ultravisor. For
4279                  * example we must not inject interrupts after specific exits
4280                  * (e.g. 112 prefix page not secure). We do this by turning
4281                  * off the machine check, external and I/O interrupt bits
4282                  * of our PSW copy. To avoid getting validity intercepts, we
4283                  * do only accept the condition code from userspace.
4284                  */
4285                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4286                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4287                                                    PSW_MASK_CC;
4288         }
4289
4290         kvm_run->kvm_dirty_regs = 0;
4291 }
4292
4293 static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4294 {
4295         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4296         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4297         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4298         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4299         if (MACHINE_HAS_GS) {
4300                 __ctl_set_bit(2, 4);
4301                 if (vcpu->arch.gs_enabled)
4302                         save_gs_cb(current->thread.gs_cb);
4303                 preempt_disable();
4304                 current->thread.gs_cb = vcpu->arch.host_gscb;
4305                 restore_gs_cb(vcpu->arch.host_gscb);
4306                 preempt_enable();
4307                 if (!vcpu->arch.host_gscb)
4308                         __ctl_clear_bit(2, 4);
4309                 vcpu->arch.host_gscb = NULL;
4310         }
4311         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4312 }
4313
4314 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
4315 {
4316         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4317         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4318         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4319         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4320         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4321         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4322         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4323         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4324         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4325         save_access_regs(vcpu->run->s.regs.acrs);
4326         restore_access_regs(vcpu->arch.host_acrs);
4327         /* Save guest register state */
4328         save_fpu_regs();
4329         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4330         /* Restore will be done lazily at return */
4331         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4332         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4333         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4334                 store_regs_fmt2(vcpu, kvm_run);
4335 }
4336
4337 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4338 {
4339         struct kvm_run *kvm_run = vcpu->run;
4340         int rc;
4341
4342         if (kvm_run->immediate_exit)
4343                 return -EINTR;
4344
4345         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4346             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4347                 return -EINVAL;
4348
4349         vcpu_load(vcpu);
4350
4351         if (guestdbg_exit_pending(vcpu)) {
4352                 kvm_s390_prepare_debug_exit(vcpu);
4353                 rc = 0;
4354                 goto out;
4355         }
4356
4357         kvm_sigset_activate(vcpu);
4358
4359         /*
4360          * no need to check the return value of vcpu_start as it can only have
4361          * an error for protvirt, but protvirt means user cpu state
4362          */
4363         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4364                 kvm_s390_vcpu_start(vcpu);
4365         } else if (is_vcpu_stopped(vcpu)) {
4366                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4367                                    vcpu->vcpu_id);
4368                 rc = -EINVAL;
4369                 goto out;
4370         }
4371
4372         sync_regs(vcpu, kvm_run);
4373         enable_cpu_timer_accounting(vcpu);
4374
4375         might_fault();
4376         rc = __vcpu_run(vcpu);
4377
4378         if (signal_pending(current) && !rc) {
4379                 kvm_run->exit_reason = KVM_EXIT_INTR;
4380                 rc = -EINTR;
4381         }
4382
4383         if (guestdbg_exit_pending(vcpu) && !rc)  {
4384                 kvm_s390_prepare_debug_exit(vcpu);
4385                 rc = 0;
4386         }
4387
4388         if (rc == -EREMOTE) {
4389                 /* userspace support is needed, kvm_run has been prepared */
4390                 rc = 0;
4391         }
4392
4393         disable_cpu_timer_accounting(vcpu);
4394         store_regs(vcpu, kvm_run);
4395
4396         kvm_sigset_deactivate(vcpu);
4397
4398         vcpu->stat.exit_userspace++;
4399 out:
4400         vcpu_put(vcpu);
4401         return rc;
4402 }
4403
4404 /*
4405  * store status at address
4406  * we use have two special cases:
4407  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4408  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4409  */
4410 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4411 {
4412         unsigned char archmode = 1;
4413         freg_t fprs[NUM_FPRS];
4414         unsigned int px;
4415         u64 clkcomp, cputm;
4416         int rc;
4417
4418         px = kvm_s390_get_prefix(vcpu);
4419         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4420                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4421                         return -EFAULT;
4422                 gpa = 0;
4423         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4424                 if (write_guest_real(vcpu, 163, &archmode, 1))
4425                         return -EFAULT;
4426                 gpa = px;
4427         } else
4428                 gpa -= __LC_FPREGS_SAVE_AREA;
4429
4430         /* manually convert vector registers if necessary */
4431         if (MACHINE_HAS_VX) {
4432                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4433                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4434                                      fprs, 128);
4435         } else {
4436                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4437                                      vcpu->run->s.regs.fprs, 128);
4438         }
4439         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4440                               vcpu->run->s.regs.gprs, 128);
4441         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4442                               &vcpu->arch.sie_block->gpsw, 16);
4443         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4444                               &px, 4);
4445         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4446                               &vcpu->run->s.regs.fpc, 4);
4447         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4448                               &vcpu->arch.sie_block->todpr, 4);
4449         cputm = kvm_s390_get_cpu_timer(vcpu);
4450         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4451                               &cputm, 8);
4452         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4453         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4454                               &clkcomp, 8);
4455         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4456                               &vcpu->run->s.regs.acrs, 64);
4457         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4458                               &vcpu->arch.sie_block->gcr, 128);
4459         return rc ? -EFAULT : 0;
4460 }
4461
4462 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4463 {
4464         /*
4465          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4466          * switch in the run ioctl. Let's update our copies before we save
4467          * it into the save area
4468          */
4469         save_fpu_regs();
4470         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4471         save_access_regs(vcpu->run->s.regs.acrs);
4472
4473         return kvm_s390_store_status_unloaded(vcpu, addr);
4474 }
4475
4476 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4477 {
4478         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4479         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4480 }
4481
4482 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4483 {
4484         unsigned int i;
4485         struct kvm_vcpu *vcpu;
4486
4487         kvm_for_each_vcpu(i, vcpu, kvm) {
4488                 __disable_ibs_on_vcpu(vcpu);
4489         }
4490 }
4491
4492 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4493 {
4494         if (!sclp.has_ibs)
4495                 return;
4496         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4497         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4498 }
4499
4500 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4501 {
4502         int i, online_vcpus, r = 0, started_vcpus = 0;
4503
4504         if (!is_vcpu_stopped(vcpu))
4505                 return 0;
4506
4507         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4508         /* Only one cpu at a time may enter/leave the STOPPED state. */
4509         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4510         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4511
4512         /* Let's tell the UV that we want to change into the operating state */
4513         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4514                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4515                 if (r) {
4516                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4517                         return r;
4518                 }
4519         }
4520
4521         for (i = 0; i < online_vcpus; i++) {
4522                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4523                         started_vcpus++;
4524         }
4525
4526         if (started_vcpus == 0) {
4527                 /* we're the only active VCPU -> speed it up */
4528                 __enable_ibs_on_vcpu(vcpu);
4529         } else if (started_vcpus == 1) {
4530                 /*
4531                  * As we are starting a second VCPU, we have to disable
4532                  * the IBS facility on all VCPUs to remove potentially
4533                  * oustanding ENABLE requests.
4534                  */
4535                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4536         }
4537
4538         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4539         /*
4540          * The real PSW might have changed due to a RESTART interpreted by the
4541          * ultravisor. We block all interrupts and let the next sie exit
4542          * refresh our view.
4543          */
4544         if (kvm_s390_pv_cpu_is_protected(vcpu))
4545                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4546         /*
4547          * Another VCPU might have used IBS while we were offline.
4548          * Let's play safe and flush the VCPU at startup.
4549          */
4550         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4551         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4552         return 0;
4553 }
4554
4555 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4556 {
4557         int i, online_vcpus, r = 0, started_vcpus = 0;
4558         struct kvm_vcpu *started_vcpu = NULL;
4559
4560         if (is_vcpu_stopped(vcpu))
4561                 return 0;
4562
4563         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4564         /* Only one cpu at a time may enter/leave the STOPPED state. */
4565         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4566         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4567
4568         /* Let's tell the UV that we want to change into the stopped state */
4569         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4570                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4571                 if (r) {
4572                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4573                         return r;
4574                 }
4575         }
4576
4577         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4578         kvm_s390_clear_stop_irq(vcpu);
4579
4580         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4581         __disable_ibs_on_vcpu(vcpu);
4582
4583         for (i = 0; i < online_vcpus; i++) {
4584                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4585                         started_vcpus++;
4586                         started_vcpu = vcpu->kvm->vcpus[i];
4587                 }
4588         }
4589
4590         if (started_vcpus == 1) {
4591                 /*
4592                  * As we only have one VCPU left, we want to enable the
4593                  * IBS facility for that VCPU to speed it up.
4594                  */
4595                 __enable_ibs_on_vcpu(started_vcpu);
4596         }
4597
4598         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4599         return 0;
4600 }
4601
4602 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4603                                      struct kvm_enable_cap *cap)
4604 {
4605         int r;
4606
4607         if (cap->flags)
4608                 return -EINVAL;
4609
4610         switch (cap->cap) {
4611         case KVM_CAP_S390_CSS_SUPPORT:
4612                 if (!vcpu->kvm->arch.css_support) {
4613                         vcpu->kvm->arch.css_support = 1;
4614                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4615                         trace_kvm_s390_enable_css(vcpu->kvm);
4616                 }
4617                 r = 0;
4618                 break;
4619         default:
4620                 r = -EINVAL;
4621                 break;
4622         }
4623         return r;
4624 }
4625
4626 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4627                                    struct kvm_s390_mem_op *mop)
4628 {
4629         void __user *uaddr = (void __user *)mop->buf;
4630         int r = 0;
4631
4632         if (mop->flags || !mop->size)
4633                 return -EINVAL;
4634         if (mop->size + mop->sida_offset < mop->size)
4635                 return -EINVAL;
4636         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4637                 return -E2BIG;
4638
4639         switch (mop->op) {
4640         case KVM_S390_MEMOP_SIDA_READ:
4641                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4642                                  mop->sida_offset), mop->size))
4643                         r = -EFAULT;
4644
4645                 break;
4646         case KVM_S390_MEMOP_SIDA_WRITE:
4647                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4648                                    mop->sida_offset), uaddr, mop->size))
4649                         r = -EFAULT;
4650                 break;
4651         }
4652         return r;
4653 }
4654 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4655                                   struct kvm_s390_mem_op *mop)
4656 {
4657         void __user *uaddr = (void __user *)mop->buf;
4658         void *tmpbuf = NULL;
4659         int r = 0;
4660         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4661                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4662
4663         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4664                 return -EINVAL;
4665
4666         if (mop->size > MEM_OP_MAX_SIZE)
4667                 return -E2BIG;
4668
4669         if (kvm_s390_pv_cpu_is_protected(vcpu))
4670                 return -EINVAL;
4671
4672         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4673                 tmpbuf = vmalloc(mop->size);
4674                 if (!tmpbuf)
4675                         return -ENOMEM;
4676         }
4677
4678         switch (mop->op) {
4679         case KVM_S390_MEMOP_LOGICAL_READ:
4680                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4681                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4682                                             mop->size, GACC_FETCH);
4683                         break;
4684                 }
4685                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4686                 if (r == 0) {
4687                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4688                                 r = -EFAULT;
4689                 }
4690                 break;
4691         case KVM_S390_MEMOP_LOGICAL_WRITE:
4692                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4693                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4694                                             mop->size, GACC_STORE);
4695                         break;
4696                 }
4697                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4698                         r = -EFAULT;
4699                         break;
4700                 }
4701                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4702                 break;
4703         }
4704
4705         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4706                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4707
4708         vfree(tmpbuf);
4709         return r;
4710 }
4711
4712 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4713                                       struct kvm_s390_mem_op *mop)
4714 {
4715         int r, srcu_idx;
4716
4717         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4718
4719         switch (mop->op) {
4720         case KVM_S390_MEMOP_LOGICAL_READ:
4721         case KVM_S390_MEMOP_LOGICAL_WRITE:
4722                 r = kvm_s390_guest_mem_op(vcpu, mop);
4723                 break;
4724         case KVM_S390_MEMOP_SIDA_READ:
4725         case KVM_S390_MEMOP_SIDA_WRITE:
4726                 /* we are locked against sida going away by the vcpu->mutex */
4727                 r = kvm_s390_guest_sida_op(vcpu, mop);
4728                 break;
4729         default:
4730                 r = -EINVAL;
4731         }
4732
4733         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4734         return r;
4735 }
4736
4737 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4738                                unsigned int ioctl, unsigned long arg)
4739 {
4740         struct kvm_vcpu *vcpu = filp->private_data;
4741         void __user *argp = (void __user *)arg;
4742
4743         switch (ioctl) {
4744         case KVM_S390_IRQ: {
4745                 struct kvm_s390_irq s390irq;
4746
4747                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4748                         return -EFAULT;
4749                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4750         }
4751         case KVM_S390_INTERRUPT: {
4752                 struct kvm_s390_interrupt s390int;
4753                 struct kvm_s390_irq s390irq = {};
4754
4755                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4756                         return -EFAULT;
4757                 if (s390int_to_s390irq(&s390int, &s390irq))
4758                         return -EINVAL;
4759                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4760         }
4761         }
4762         return -ENOIOCTLCMD;
4763 }
4764
4765 long kvm_arch_vcpu_ioctl(struct file *filp,
4766                          unsigned int ioctl, unsigned long arg)
4767 {
4768         struct kvm_vcpu *vcpu = filp->private_data;
4769         void __user *argp = (void __user *)arg;
4770         int idx;
4771         long r;
4772         u16 rc, rrc;
4773
4774         vcpu_load(vcpu);
4775
4776         switch (ioctl) {
4777         case KVM_S390_STORE_STATUS:
4778                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4779                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4780                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4781                 break;
4782         case KVM_S390_SET_INITIAL_PSW: {
4783                 psw_t psw;
4784
4785                 r = -EFAULT;
4786                 if (copy_from_user(&psw, argp, sizeof(psw)))
4787                         break;
4788                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4789                 break;
4790         }
4791         case KVM_S390_CLEAR_RESET:
4792                 r = 0;
4793                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4794                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4795                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4796                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4797                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4798                                    rc, rrc);
4799                 }
4800                 break;
4801         case KVM_S390_INITIAL_RESET:
4802                 r = 0;
4803                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4804                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4805                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4806                                           UVC_CMD_CPU_RESET_INITIAL,
4807                                           &rc, &rrc);
4808                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4809                                    rc, rrc);
4810                 }
4811                 break;
4812         case KVM_S390_NORMAL_RESET:
4813                 r = 0;
4814                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4815                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4816                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4817                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4818                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4819                                    rc, rrc);
4820                 }
4821                 break;
4822         case KVM_SET_ONE_REG:
4823         case KVM_GET_ONE_REG: {
4824                 struct kvm_one_reg reg;
4825                 r = -EINVAL;
4826                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4827                         break;
4828                 r = -EFAULT;
4829                 if (copy_from_user(&reg, argp, sizeof(reg)))
4830                         break;
4831                 if (ioctl == KVM_SET_ONE_REG)
4832                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4833                 else
4834                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4835                 break;
4836         }
4837 #ifdef CONFIG_KVM_S390_UCONTROL
4838         case KVM_S390_UCAS_MAP: {
4839                 struct kvm_s390_ucas_mapping ucasmap;
4840
4841                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4842                         r = -EFAULT;
4843                         break;
4844                 }
4845
4846                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4847                         r = -EINVAL;
4848                         break;
4849                 }
4850
4851                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4852                                      ucasmap.vcpu_addr, ucasmap.length);
4853                 break;
4854         }
4855         case KVM_S390_UCAS_UNMAP: {
4856                 struct kvm_s390_ucas_mapping ucasmap;
4857
4858                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4859                         r = -EFAULT;
4860                         break;
4861                 }
4862
4863                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4864                         r = -EINVAL;
4865                         break;
4866                 }
4867
4868                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4869                         ucasmap.length);
4870                 break;
4871         }
4872 #endif
4873         case KVM_S390_VCPU_FAULT: {
4874                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4875                 break;
4876         }
4877         case KVM_ENABLE_CAP:
4878         {
4879                 struct kvm_enable_cap cap;
4880                 r = -EFAULT;
4881                 if (copy_from_user(&cap, argp, sizeof(cap)))
4882                         break;
4883                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4884                 break;
4885         }
4886         case KVM_S390_MEM_OP: {
4887                 struct kvm_s390_mem_op mem_op;
4888
4889                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4890                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4891                 else
4892                         r = -EFAULT;
4893                 break;
4894         }
4895         case KVM_S390_SET_IRQ_STATE: {
4896                 struct kvm_s390_irq_state irq_state;
4897
4898                 r = -EFAULT;
4899                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4900                         break;
4901                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4902                     irq_state.len == 0 ||
4903                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4904                         r = -EINVAL;
4905                         break;
4906                 }
4907                 /* do not use irq_state.flags, it will break old QEMUs */
4908                 r = kvm_s390_set_irq_state(vcpu,
4909                                            (void __user *) irq_state.buf,
4910                                            irq_state.len);
4911                 break;
4912         }
4913         case KVM_S390_GET_IRQ_STATE: {
4914                 struct kvm_s390_irq_state irq_state;
4915
4916                 r = -EFAULT;
4917                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4918                         break;
4919                 if (irq_state.len == 0) {
4920                         r = -EINVAL;
4921                         break;
4922                 }
4923                 /* do not use irq_state.flags, it will break old QEMUs */
4924                 r = kvm_s390_get_irq_state(vcpu,
4925                                            (__u8 __user *)  irq_state.buf,
4926                                            irq_state.len);
4927                 break;
4928         }
4929         default:
4930                 r = -ENOTTY;
4931         }
4932
4933         vcpu_put(vcpu);
4934         return r;
4935 }
4936
4937 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4938 {
4939 #ifdef CONFIG_KVM_S390_UCONTROL
4940         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4941                  && (kvm_is_ucontrol(vcpu->kvm))) {
4942                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4943                 get_page(vmf->page);
4944                 return 0;
4945         }
4946 #endif
4947         return VM_FAULT_SIGBUS;
4948 }
4949
4950 /* Section: memory related */
4951 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4952                                    struct kvm_memory_slot *memslot,
4953                                    const struct kvm_userspace_memory_region *mem,
4954                                    enum kvm_mr_change change)
4955 {
4956         /* A few sanity checks. We can have memory slots which have to be
4957            located/ended at a segment boundary (1MB). The memory in userland is
4958            ok to be fragmented into various different vmas. It is okay to mmap()
4959            and munmap() stuff in this slot after doing this call at any time */
4960
4961         if (mem->userspace_addr & 0xffffful)
4962                 return -EINVAL;
4963
4964         if (mem->memory_size & 0xffffful)
4965                 return -EINVAL;
4966
4967         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
4968                 return -EINVAL;
4969
4970         /* When we are protected, we should not change the memory slots */
4971         if (kvm_s390_pv_get_handle(kvm))
4972                 return -EINVAL;
4973         return 0;
4974 }
4975
4976 void kvm_arch_commit_memory_region(struct kvm *kvm,
4977                                 const struct kvm_userspace_memory_region *mem,
4978                                 struct kvm_memory_slot *old,
4979                                 const struct kvm_memory_slot *new,
4980                                 enum kvm_mr_change change)
4981 {
4982         int rc = 0;
4983
4984         switch (change) {
4985         case KVM_MR_DELETE:
4986                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4987                                         old->npages * PAGE_SIZE);
4988                 break;
4989         case KVM_MR_MOVE:
4990                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
4991                                         old->npages * PAGE_SIZE);
4992                 if (rc)
4993                         break;
4994                 fallthrough;
4995         case KVM_MR_CREATE:
4996                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
4997                                       mem->guest_phys_addr, mem->memory_size);
4998                 break;
4999         case KVM_MR_FLAGS_ONLY:
5000                 break;
5001         default:
5002                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5003         }
5004         if (rc)
5005                 pr_warn("failed to commit memory region\n");
5006         return;
5007 }
5008
5009 static inline unsigned long nonhyp_mask(int i)
5010 {
5011         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5012
5013         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5014 }
5015
5016 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5017 {
5018         vcpu->valid_wakeup = false;
5019 }
5020
5021 static int __init kvm_s390_init(void)
5022 {
5023         int i;
5024
5025         if (!sclp.has_sief2) {
5026                 pr_info("SIE is not available\n");
5027                 return -ENODEV;
5028         }
5029
5030         if (nested && hpage) {
5031                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5032                 return -EINVAL;
5033         }
5034
5035         for (i = 0; i < 16; i++)
5036                 kvm_s390_fac_base[i] |=
5037                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
5038
5039         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5040 }
5041
5042 static void __exit kvm_s390_exit(void)
5043 {
5044         kvm_exit();
5045 }
5046
5047 module_init(kvm_s390_init);
5048 module_exit(kvm_s390_exit);
5049
5050 /*
5051  * Enable autoloading of the kvm module.
5052  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5053  * since x86 takes a different approach.
5054  */
5055 #include <linux/miscdevice.h>
5056 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5057 MODULE_ALIAS("devname:kvm");