arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62         KVM_GENERIC_VM_STATS(),
  63         STATS_DESC_COUNTER(VM, inject_io),
  64         STATS_DESC_COUNTER(VM, inject_float_mchk),
  65         STATS_DESC_COUNTER(VM, inject_pfault_done),
  66         STATS_DESC_COUNTER(VM, inject_service_signal),
  67         STATS_DESC_COUNTER(VM, inject_virtio)
  68 };
  69
  70 const struct kvm_stats_header kvm_vm_stats_header = {
  71         .name_size = KVM_STATS_NAME_SIZE,
  72         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  73         .id_offset = sizeof(struct kvm_stats_header),
  74         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  75         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  76                        sizeof(kvm_vm_stats_desc),
  77 };
  78
  79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  80         KVM_GENERIC_VCPU_STATS(),
  81         STATS_DESC_COUNTER(VCPU, exit_userspace),
  82         STATS_DESC_COUNTER(VCPU, exit_null),
  83         STATS_DESC_COUNTER(VCPU, exit_external_request),
  84         STATS_DESC_COUNTER(VCPU, exit_io_request),
  85         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  86         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  87         STATS_DESC_COUNTER(VCPU, exit_validity),
  88         STATS_DESC_COUNTER(VCPU, exit_instruction),
  89         STATS_DESC_COUNTER(VCPU, exit_pei),
  90         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  91         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  92         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  93         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  94         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  95         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  96         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  97         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  98         STATS_DESC_COUNTER(VCPU, deliver_ckc),
  99         STATS_DESC_COUNTER(VCPU, deliver_cputm),
 100         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 101         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 102         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 103         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 104         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 106         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 107         STATS_DESC_COUNTER(VCPU, deliver_program),
 108         STATS_DESC_COUNTER(VCPU, deliver_io),
 109         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 110         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 111         STATS_DESC_COUNTER(VCPU, inject_ckc),
 112         STATS_DESC_COUNTER(VCPU, inject_cputm),
 113         STATS_DESC_COUNTER(VCPU, inject_external_call),
 114         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 115         STATS_DESC_COUNTER(VCPU, inject_mchk),
 116         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 117         STATS_DESC_COUNTER(VCPU, inject_program),
 118         STATS_DESC_COUNTER(VCPU, inject_restart),
 119         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 120         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 121         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 122         STATS_DESC_COUNTER(VCPU, instruction_gs),
 123         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 124         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 125         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 126         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 127         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 128         STATS_DESC_COUNTER(VCPU, instruction_sck),
 129         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 130         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 131         STATS_DESC_COUNTER(VCPU, instruction_spx),
 132         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 133         STATS_DESC_COUNTER(VCPU, instruction_stap),
 134         STATS_DESC_COUNTER(VCPU, instruction_iske),
 135         STATS_DESC_COUNTER(VCPU, instruction_ri),
 136         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 137         STATS_DESC_COUNTER(VCPU, instruction_sske),
 138         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 139         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 140         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 141         STATS_DESC_COUNTER(VCPU, instruction_tb),
 142         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 143         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 144         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 145         STATS_DESC_COUNTER(VCPU, instruction_sie),
 146         STATS_DESC_COUNTER(VCPU, instruction_essa),
 147         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 149         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 163         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 167         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 168         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 173         STATS_DESC_COUNTER(VCPU, pfault_sync)
 174 };
 175
 176 const struct kvm_stats_header kvm_vcpu_stats_header = {
 177         .name_size = KVM_STATS_NAME_SIZE,
 178         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 179         .id_offset = sizeof(struct kvm_stats_header),
 180         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 181         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 182                        sizeof(kvm_vcpu_stats_desc),
 183 };
 184
 185 /* allow nested virtualization in KVM (if enabled by user space) */
 186 static int nested;
 187 module_param(nested, int, S_IRUGO);
 188 MODULE_PARM_DESC(nested, "Nested virtualization support");
 189
 190 /* allow 1m huge page guest backing, if !nested */
 191 static int hpage;
 192 module_param(hpage, int, 0444);
 193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 194
 195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 196 static u8 halt_poll_max_steal = 10;
 197 module_param(halt_poll_max_steal, byte, 0644);
 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 199
 200 /* if set to true, the GISA will be initialized and used if available */
 201 static bool use_gisa  = true;
 202 module_param(use_gisa, bool, 0644);
 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 204
 205 /* maximum diag9c forwarding per second */
 206 unsigned int diag9c_forwarding_hz;
 207 module_param(diag9c_forwarding_hz, uint, 0644);
 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 209
 210 /*
 211  * For now we handle at most 16 double words as this is what the s390 base
 212  * kernel handles and stores in the prefix page. If we ever need to go beyond
 213  * this, this requires changes to code, but the external uapi can stay.
 214  */
 215 #define SIZE_INTERNAL 16
 216
 217 /*
 218  * Base feature mask that defines default mask for facilities. Consists of the
 219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 220  */
 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 222 /*
 223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 224  * and defines the facilities that can be enabled via a cpu model.
 225  */
 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 227
 228 static unsigned long kvm_s390_fac_size(void)
 229 {
 230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 231         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 232         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 233                 sizeof(stfle_fac_list));
 234
 235         return SIZE_INTERNAL;
 236 }
 237
 238 /* available cpu features supported by kvm */
 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 240 /* available subfunctions indicated via query / "test bit" */
 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 242
 243 static struct gmap_notifier gmap_notifier;
 244 static struct gmap_notifier vsie_gmap_notifier;
 245 debug_info_t *kvm_s390_dbf;
 246 debug_info_t *kvm_s390_dbf_uv;
 247
 248 /* Section: not file related */
 249 int kvm_arch_hardware_enable(void)
 250 {
 251         /* every s390 is virtualization enabled ;-) */
 252         return 0;
 253 }
 254
 255 int kvm_arch_check_processor_compat(void *opaque)
 256 {
 257         return 0;
 258 }
 259
 260 /* forward declarations */
 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 262                               unsigned long end);
 263 static int sca_switch_to_extended(struct kvm *kvm);
 264
 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 266 {
 267         u8 delta_idx = 0;
 268
 269         /*
 270          * The TOD jumps by delta, we have to compensate this by adding
 271          * -delta to the epoch.
 272          */
 273         delta = -delta;
 274
 275         /* sign-extension - we're adding to signed values below */
 276         if ((s64)delta < 0)
 277                 delta_idx = -1;
 278
 279         scb->epoch += delta;
 280         if (scb->ecd & ECD_MEF) {
 281                 scb->epdx += delta_idx;
 282                 if (scb->epoch < delta)
 283                         scb->epdx += 1;
 284         }
 285 }
 286
 287 /*
 288  * This callback is executed during stop_machine(). All CPUs are therefore
 289  * temporarily stopped. In order not to change guest behavior, we have to
 290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 291  * so a CPU won't be stopped while calculating with the epoch.
 292  */
 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 294                           void *v)
 295 {
 296         struct kvm *kvm;
 297         struct kvm_vcpu *vcpu;
 298         unsigned long i;
 299         unsigned long long *delta = v;
 300
 301         list_for_each_entry(kvm, &vm_list, vm_list) {
 302                 kvm_for_each_vcpu(i, vcpu, kvm) {
 303                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 304                         if (i == 0) {
 305                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 306                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 307                         }
 308                         if (vcpu->arch.cputm_enabled)
 309                                 vcpu->arch.cputm_start += *delta;
 310                         if (vcpu->arch.vsie_block)
 311                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 312                                                    *delta);
 313                 }
 314         }
 315         return NOTIFY_OK;
 316 }
 317
 318 static struct notifier_block kvm_clock_notifier = {
 319         .notifier_call = kvm_clock_sync,
 320 };
 321
 322 int kvm_arch_hardware_setup(void *opaque)
 323 {
 324         gmap_notifier.notifier_call = kvm_gmap_notifier;
 325         gmap_register_pte_notifier(&gmap_notifier);
 326         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 327         gmap_register_pte_notifier(&vsie_gmap_notifier);
 328         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 329                                        &kvm_clock_notifier);
 330         return 0;
 331 }
 332
 333 void kvm_arch_hardware_unsetup(void)
 334 {
 335         gmap_unregister_pte_notifier(&gmap_notifier);
 336         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 337         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 338                                          &kvm_clock_notifier);
 339 }
 340
 341 static void allow_cpu_feat(unsigned long nr)
 342 {
 343         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 344 }
 345
 346 static inline int plo_test_bit(unsigned char nr)
 347 {
 348         unsigned long function = (unsigned long)nr | 0x100;
 349         int cc;
 350
 351         asm volatile(
 352                 "       lgr     0,%[function]\n"
 353                 /* Parameter registers are ignored for "test bit" */
 354                 "       plo     0,0,0,0(0)\n"
 355                 "       ipm     %0\n"
 356                 "       srl     %0,28\n"
 357                 : "=d" (cc)
 358                 : [function] "d" (function)
 359                 : "cc", "0");
 360         return cc == 0;
 361 }
 362
 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 364 {
 365         asm volatile(
 366                 "       lghi    0,0\n"
 367                 "       lgr     1,%[query]\n"
 368                 /* Parameter registers are ignored */
 369                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 370                 :
 371                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 372                 : "cc", "memory", "0", "1");
 373 }
 374
 375 #define INSN_SORTL 0xb938
 376 #define INSN_DFLTCC 0xb939
 377
 378 static void kvm_s390_cpu_feat_init(void)
 379 {
 380         int i;
 381
 382         for (i = 0; i < 256; ++i) {
 383                 if (plo_test_bit(i))
 384                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 385         }
 386
 387         if (test_facility(28)) /* TOD-clock steering */
 388                 ptff(kvm_s390_available_subfunc.ptff,
 389                      sizeof(kvm_s390_available_subfunc.ptff),
 390                      PTFF_QAF);
 391
 392         if (test_facility(17)) { /* MSA */
 393                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 394                               kvm_s390_available_subfunc.kmac);
 395                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kmc);
 397                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.km);
 399                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kimd);
 401                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.klmd);
 403         }
 404         if (test_facility(76)) /* MSA3 */
 405                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.pckmo);
 407         if (test_facility(77)) { /* MSA4 */
 408                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 409                               kvm_s390_available_subfunc.kmctr);
 410                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 411                               kvm_s390_available_subfunc.kmf);
 412                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 413                               kvm_s390_available_subfunc.kmo);
 414                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 415                               kvm_s390_available_subfunc.pcc);
 416         }
 417         if (test_facility(57)) /* MSA5 */
 418                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 419                               kvm_s390_available_subfunc.ppno);
 420
 421         if (test_facility(146)) /* MSA8 */
 422                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 423                               kvm_s390_available_subfunc.kma);
 424
 425         if (test_facility(155)) /* MSA9 */
 426                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 427                               kvm_s390_available_subfunc.kdsa);
 428
 429         if (test_facility(150)) /* SORTL */
 430                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 431
 432         if (test_facility(151)) /* DFLTCC */
 433                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 434
 435         if (MACHINE_HAS_ESOP)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 437         /*
 438          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 439          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 440          */
 441         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 442             !test_facility(3) || !nested)
 443                 return;
 444         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 445         if (sclp.has_64bscao)
 446                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 447         if (sclp.has_siif)
 448                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 449         if (sclp.has_gpere)
 450                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 451         if (sclp.has_gsls)
 452                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 453         if (sclp.has_ib)
 454                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 455         if (sclp.has_cei)
 456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 457         if (sclp.has_ibs)
 458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 459         if (sclp.has_kss)
 460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 461         /*
 462          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 463          * all skey handling functions read/set the skey from the PGSTE
 464          * instead of the real storage key.
 465          *
 466          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 467          * pages being detected as preserved although they are resident.
 468          *
 469          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 470          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 471          *
 472          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 473          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 474          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 475          *
 476          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 477          * cannot easily shadow the SCA because of the ipte lock.
 478          */
 479 }
 480
 481 int kvm_arch_init(void *opaque)
 482 {
 483         int rc = -ENOMEM;
 484
 485         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 486         if (!kvm_s390_dbf)
 487                 return -ENOMEM;
 488
 489         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 490         if (!kvm_s390_dbf_uv)
 491                 goto out;
 492
 493         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 494             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 495                 goto out;
 496
 497         kvm_s390_cpu_feat_init();
 498
 499         /* Register floating interrupt controller interface. */
 500         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 501         if (rc) {
 502                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 503                 goto out;
 504         }
 505
 506         rc = kvm_s390_gib_init(GAL_ISC);
 507         if (rc)
 508                 goto out;
 509
 510         return 0;
 511
 512 out:
 513         kvm_arch_exit();
 514         return rc;
 515 }
 516
 517 void kvm_arch_exit(void)
 518 {
 519         kvm_s390_gib_destroy();
 520         debug_unregister(kvm_s390_dbf);
 521         debug_unregister(kvm_s390_dbf_uv);
 522 }
 523
 524 /* Section: device related */
 525 long kvm_arch_dev_ioctl(struct file *filp,
 526                         unsigned int ioctl, unsigned long arg)
 527 {
 528         if (ioctl == KVM_S390_ENABLE_SIE)
 529                 return s390_enable_sie();
 530         return -EINVAL;
 531 }
 532
 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 534 {
 535         int r;
 536
 537         switch (ext) {
 538         case KVM_CAP_S390_PSW:
 539         case KVM_CAP_S390_GMAP:
 540         case KVM_CAP_SYNC_MMU:
 541 #ifdef CONFIG_KVM_S390_UCONTROL
 542         case KVM_CAP_S390_UCONTROL:
 543 #endif
 544         case KVM_CAP_ASYNC_PF:
 545         case KVM_CAP_SYNC_REGS:
 546         case KVM_CAP_ONE_REG:
 547         case KVM_CAP_ENABLE_CAP:
 548         case KVM_CAP_S390_CSS_SUPPORT:
 549         case KVM_CAP_IOEVENTFD:
 550         case KVM_CAP_DEVICE_CTRL:
 551         case KVM_CAP_S390_IRQCHIP:
 552         case KVM_CAP_VM_ATTRIBUTES:
 553         case KVM_CAP_MP_STATE:
 554         case KVM_CAP_IMMEDIATE_EXIT:
 555         case KVM_CAP_S390_INJECT_IRQ:
 556         case KVM_CAP_S390_USER_SIGP:
 557         case KVM_CAP_S390_USER_STSI:
 558         case KVM_CAP_S390_SKEYS:
 559         case KVM_CAP_S390_IRQ_STATE:
 560         case KVM_CAP_S390_USER_INSTR0:
 561         case KVM_CAP_S390_CMMA_MIGRATION:
 562         case KVM_CAP_S390_AIS:
 563         case KVM_CAP_S390_AIS_MIGRATION:
 564         case KVM_CAP_S390_VCPU_RESETS:
 565         case KVM_CAP_SET_GUEST_DEBUG:
 566         case KVM_CAP_S390_DIAG318:
 567                 r = 1;
 568                 break;
 569         case KVM_CAP_SET_GUEST_DEBUG2:
 570                 r = KVM_GUESTDBG_VALID_MASK;
 571                 break;
 572         case KVM_CAP_S390_HPAGE_1M:
 573                 r = 0;
 574                 if (hpage && !kvm_is_ucontrol(kvm))
 575                         r = 1;
 576                 break;
 577         case KVM_CAP_S390_MEM_OP:
 578                 r = MEM_OP_MAX_SIZE;
 579                 break;
 580         case KVM_CAP_NR_VCPUS:
 581         case KVM_CAP_MAX_VCPUS:
 582         case KVM_CAP_MAX_VCPU_ID:
 583                 r = KVM_S390_BSCA_CPU_SLOTS;
 584                 if (!kvm_s390_use_sca_entries())
 585                         r = KVM_MAX_VCPUS;
 586                 else if (sclp.has_esca && sclp.has_64bscao)
 587                         r = KVM_S390_ESCA_CPU_SLOTS;
 588                 if (ext == KVM_CAP_NR_VCPUS)
 589                         r = min_t(unsigned int, num_online_cpus(), r);
 590                 break;
 591         case KVM_CAP_S390_COW:
 592                 r = MACHINE_HAS_ESOP;
 593                 break;
 594         case KVM_CAP_S390_VECTOR_REGISTERS:
 595                 r = MACHINE_HAS_VX;
 596                 break;
 597         case KVM_CAP_S390_RI:
 598                 r = test_facility(64);
 599                 break;
 600         case KVM_CAP_S390_GS:
 601                 r = test_facility(133);
 602                 break;
 603         case KVM_CAP_S390_BPB:
 604                 r = test_facility(82);
 605                 break;
 606         case KVM_CAP_S390_PROTECTED:
 607                 r = is_prot_virt_host();
 608                 break;
 609         default:
 610                 r = 0;
 611         }
 612         return r;
 613 }
 614
 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 616 {
 617         int i;
 618         gfn_t cur_gfn, last_gfn;
 619         unsigned long gaddr, vmaddr;
 620         struct gmap *gmap = kvm->arch.gmap;
 621         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 622
 623         /* Loop over all guest segments */
 624         cur_gfn = memslot->base_gfn;
 625         last_gfn = memslot->base_gfn + memslot->npages;
 626         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 627                 gaddr = gfn_to_gpa(cur_gfn);
 628                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 629                 if (kvm_is_error_hva(vmaddr))
 630                         continue;
 631
 632                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 633                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 634                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 635                         if (test_bit(i, bitmap))
 636                                 mark_page_dirty(kvm, cur_gfn + i);
 637                 }
 638
 639                 if (fatal_signal_pending(current))
 640                         return;
 641                 cond_resched();
 642         }
 643 }
 644
 645 /* Section: vm related */
 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 647
 648 /*
 649  * Get (and clear) the dirty memory log for a memory slot.
 650  */
 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 652                                struct kvm_dirty_log *log)
 653 {
 654         int r;
 655         unsigned long n;
 656         struct kvm_memory_slot *memslot;
 657         int is_dirty;
 658
 659         if (kvm_is_ucontrol(kvm))
 660                 return -EINVAL;
 661
 662         mutex_lock(&kvm->slots_lock);
 663
 664         r = -EINVAL;
 665         if (log->slot >= KVM_USER_MEM_SLOTS)
 666                 goto out;
 667
 668         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 669         if (r)
 670                 goto out;
 671
 672         /* Clear the dirty log */
 673         if (is_dirty) {
 674                 n = kvm_dirty_bitmap_bytes(memslot);
 675                 memset(memslot->dirty_bitmap, 0, n);
 676         }
 677         r = 0;
 678 out:
 679         mutex_unlock(&kvm->slots_lock);
 680         return r;
 681 }
 682
 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 684 {
 685         unsigned long i;
 686         struct kvm_vcpu *vcpu;
 687
 688         kvm_for_each_vcpu(i, vcpu, kvm) {
 689                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 690         }
 691 }
 692
 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 694 {
 695         int r;
 696
 697         if (cap->flags)
 698                 return -EINVAL;
 699
 700         switch (cap->cap) {
 701         case KVM_CAP_S390_IRQCHIP:
 702                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 703                 kvm->arch.use_irqchip = 1;
 704                 r = 0;
 705                 break;
 706         case KVM_CAP_S390_USER_SIGP:
 707                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 708                 kvm->arch.user_sigp = 1;
 709                 r = 0;
 710                 break;
 711         case KVM_CAP_S390_VECTOR_REGISTERS:
 712                 mutex_lock(&kvm->lock);
 713                 if (kvm->created_vcpus) {
 714                         r = -EBUSY;
 715                 } else if (MACHINE_HAS_VX) {
 716                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 717                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 718                         if (test_facility(134)) {
 719                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 720                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 721                         }
 722                         if (test_facility(135)) {
 723                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 724                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 725                         }
 726                         if (test_facility(148)) {
 727                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 728                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 729                         }
 730                         if (test_facility(152)) {
 731                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 732                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 733                         }
 734                         if (test_facility(192)) {
 735                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 736                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 737                         }
 738                         r = 0;
 739                 } else
 740                         r = -EINVAL;
 741                 mutex_unlock(&kvm->lock);
 742                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 743                          r ? "(not available)" : "(success)");
 744                 break;
 745         case KVM_CAP_S390_RI:
 746                 r = -EINVAL;
 747                 mutex_lock(&kvm->lock);
 748                 if (kvm->created_vcpus) {
 749                         r = -EBUSY;
 750                 } else if (test_facility(64)) {
 751                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 752                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 753                         r = 0;
 754                 }
 755                 mutex_unlock(&kvm->lock);
 756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 757                          r ? "(not available)" : "(success)");
 758                 break;
 759         case KVM_CAP_S390_AIS:
 760                 mutex_lock(&kvm->lock);
 761                 if (kvm->created_vcpus) {
 762                         r = -EBUSY;
 763                 } else {
 764                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 765                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 766                         r = 0;
 767                 }
 768                 mutex_unlock(&kvm->lock);
 769                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 770                          r ? "(not available)" : "(success)");
 771                 break;
 772         case KVM_CAP_S390_GS:
 773                 r = -EINVAL;
 774                 mutex_lock(&kvm->lock);
 775                 if (kvm->created_vcpus) {
 776                         r = -EBUSY;
 777                 } else if (test_facility(133)) {
 778                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 779                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 780                         r = 0;
 781                 }
 782                 mutex_unlock(&kvm->lock);
 783                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 784                          r ? "(not available)" : "(success)");
 785                 break;
 786         case KVM_CAP_S390_HPAGE_1M:
 787                 mutex_lock(&kvm->lock);
 788                 if (kvm->created_vcpus)
 789                         r = -EBUSY;
 790                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 791                         r = -EINVAL;
 792                 else {
 793                         r = 0;
 794                         mmap_write_lock(kvm->mm);
 795                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 796                         mmap_write_unlock(kvm->mm);
 797                         /*
 798                          * We might have to create fake 4k page
 799                          * tables. To avoid that the hardware works on
 800                          * stale PGSTEs, we emulate these instructions.
 801                          */
 802                         kvm->arch.use_skf = 0;
 803                         kvm->arch.use_pfmfi = 0;
 804                 }
 805                 mutex_unlock(&kvm->lock);
 806                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 807                          r ? "(not available)" : "(success)");
 808                 break;
 809         case KVM_CAP_S390_USER_STSI:
 810                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 811                 kvm->arch.user_stsi = 1;
 812                 r = 0;
 813                 break;
 814         case KVM_CAP_S390_USER_INSTR0:
 815                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 816                 kvm->arch.user_instr0 = 1;
 817                 icpt_operexc_on_all_vcpus(kvm);
 818                 r = 0;
 819                 break;
 820         default:
 821                 r = -EINVAL;
 822                 break;
 823         }
 824         return r;
 825 }
 826
 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 828 {
 829         int ret;
 830
 831         switch (attr->attr) {
 832         case KVM_S390_VM_MEM_LIMIT_SIZE:
 833                 ret = 0;
 834                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 835                          kvm->arch.mem_limit);
 836                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 837                         ret = -EFAULT;
 838                 break;
 839         default:
 840                 ret = -ENXIO;
 841                 break;
 842         }
 843         return ret;
 844 }
 845
 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 847 {
 848         int ret;
 849         unsigned int idx;
 850         switch (attr->attr) {
 851         case KVM_S390_VM_MEM_ENABLE_CMMA:
 852                 ret = -ENXIO;
 853                 if (!sclp.has_cmma)
 854                         break;
 855
 856                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 857                 mutex_lock(&kvm->lock);
 858                 if (kvm->created_vcpus)
 859                         ret = -EBUSY;
 860                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 861                         ret = -EINVAL;
 862                 else {
 863                         kvm->arch.use_cmma = 1;
 864                         /* Not compatible with cmma. */
 865                         kvm->arch.use_pfmfi = 0;
 866                         ret = 0;
 867                 }
 868                 mutex_unlock(&kvm->lock);
 869                 break;
 870         case KVM_S390_VM_MEM_CLR_CMMA:
 871                 ret = -ENXIO;
 872                 if (!sclp.has_cmma)
 873                         break;
 874                 ret = -EINVAL;
 875                 if (!kvm->arch.use_cmma)
 876                         break;
 877
 878                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 879                 mutex_lock(&kvm->lock);
 880                 idx = srcu_read_lock(&kvm->srcu);
 881                 s390_reset_cmma(kvm->arch.gmap->mm);
 882                 srcu_read_unlock(&kvm->srcu, idx);
 883                 mutex_unlock(&kvm->lock);
 884                 ret = 0;
 885                 break;
 886         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 887                 unsigned long new_limit;
 888
 889                 if (kvm_is_ucontrol(kvm))
 890                         return -EINVAL;
 891
 892                 if (get_user(new_limit, (u64 __user *)attr->addr))
 893                         return -EFAULT;
 894
 895                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 896                     new_limit > kvm->arch.mem_limit)
 897                         return -E2BIG;
 898
 899                 if (!new_limit)
 900                         return -EINVAL;
 901
 902                 /* gmap_create takes last usable address */
 903                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 904                         new_limit -= 1;
 905
 906                 ret = -EBUSY;
 907                 mutex_lock(&kvm->lock);
 908                 if (!kvm->created_vcpus) {
 909                         /* gmap_create will round the limit up */
 910                         struct gmap *new = gmap_create(current->mm, new_limit);
 911
 912                         if (!new) {
 913                                 ret = -ENOMEM;
 914                         } else {
 915                                 gmap_remove(kvm->arch.gmap);
 916                                 new->private = kvm;
 917                                 kvm->arch.gmap = new;
 918                                 ret = 0;
 919                         }
 920                 }
 921                 mutex_unlock(&kvm->lock);
 922                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 923                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 924                          (void *) kvm->arch.gmap->asce);
 925                 break;
 926         }
 927         default:
 928                 ret = -ENXIO;
 929                 break;
 930         }
 931         return ret;
 932 }
 933
 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 935
 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 937 {
 938         struct kvm_vcpu *vcpu;
 939         unsigned long i;
 940
 941         kvm_s390_vcpu_block_all(kvm);
 942
 943         kvm_for_each_vcpu(i, vcpu, kvm) {
 944                 kvm_s390_vcpu_crypto_setup(vcpu);
 945                 /* recreate the shadow crycb by leaving the VSIE handler */
 946                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 947         }
 948
 949         kvm_s390_vcpu_unblock_all(kvm);
 950 }
 951
 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 953 {
 954         mutex_lock(&kvm->lock);
 955         switch (attr->attr) {
 956         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 957                 if (!test_kvm_facility(kvm, 76)) {
 958                         mutex_unlock(&kvm->lock);
 959                         return -EINVAL;
 960                 }
 961                 get_random_bytes(
 962                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 963                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 964                 kvm->arch.crypto.aes_kw = 1;
 965                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 966                 break;
 967         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 968                 if (!test_kvm_facility(kvm, 76)) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EINVAL;
 971                 }
 972                 get_random_bytes(
 973                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 974                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 975                 kvm->arch.crypto.dea_kw = 1;
 976                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 977                 break;
 978         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 979                 if (!test_kvm_facility(kvm, 76)) {
 980                         mutex_unlock(&kvm->lock);
 981                         return -EINVAL;
 982                 }
 983                 kvm->arch.crypto.aes_kw = 0;
 984                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 985                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 986                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 987                 break;
 988         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 989                 if (!test_kvm_facility(kvm, 76)) {
 990                         mutex_unlock(&kvm->lock);
 991                         return -EINVAL;
 992                 }
 993                 kvm->arch.crypto.dea_kw = 0;
 994                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 995                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 996                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 997                 break;
 998         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 999                 if (!ap_instructions_available()) {
1000                         mutex_unlock(&kvm->lock);
1001                         return -EOPNOTSUPP;
1002                 }
1003                 kvm->arch.crypto.apie = 1;
1004                 break;
1005         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006                 if (!ap_instructions_available()) {
1007                         mutex_unlock(&kvm->lock);
1008                         return -EOPNOTSUPP;
1009                 }
1010                 kvm->arch.crypto.apie = 0;
1011                 break;
1012         default:
1013                 mutex_unlock(&kvm->lock);
1014                 return -ENXIO;
1015         }
1016
1017         kvm_s390_vcpu_crypto_reset_all(kvm);
1018         mutex_unlock(&kvm->lock);
1019         return 0;
1020 }
1021
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024         unsigned long cx;
1025         struct kvm_vcpu *vcpu;
1026
1027         kvm_for_each_vcpu(cx, vcpu, kvm)
1028                 kvm_s390_sync_request(req, vcpu);
1029 }
1030
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037         struct kvm_memory_slot *ms;
1038         struct kvm_memslots *slots;
1039         unsigned long ram_pages = 0;
1040         int bkt;
1041
1042         /* migration mode already enabled */
1043         if (kvm->arch.migration_mode)
1044                 return 0;
1045         slots = kvm_memslots(kvm);
1046         if (!slots || kvm_memslots_empty(slots))
1047                 return -EINVAL;
1048
1049         if (!kvm->arch.use_cmma) {
1050                 kvm->arch.migration_mode = 1;
1051                 return 0;
1052         }
1053         /* mark all the pages in active slots as dirty */
1054         kvm_for_each_memslot(ms, bkt, slots) {
1055                 if (!ms->dirty_bitmap)
1056                         return -EINVAL;
1057                 /*
1058                  * The second half of the bitmap is only used on x86,
1059                  * and would be wasted otherwise, so we put it to good
1060                  * use here to keep track of the state of the storage
1061                  * attributes.
1062                  */
1063                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064                 ram_pages += ms->npages;
1065         }
1066         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067         kvm->arch.migration_mode = 1;
1068         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069         return 0;
1070 }
1071
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078         /* migration mode already disabled */
1079         if (!kvm->arch.migration_mode)
1080                 return 0;
1081         kvm->arch.migration_mode = 0;
1082         if (kvm->arch.use_cmma)
1083                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084         return 0;
1085 }
1086
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088                                      struct kvm_device_attr *attr)
1089 {
1090         int res = -ENXIO;
1091
1092         mutex_lock(&kvm->slots_lock);
1093         switch (attr->attr) {
1094         case KVM_S390_VM_MIGRATION_START:
1095                 res = kvm_s390_vm_start_migration(kvm);
1096                 break;
1097         case KVM_S390_VM_MIGRATION_STOP:
1098                 res = kvm_s390_vm_stop_migration(kvm);
1099                 break;
1100         default:
1101                 break;
1102         }
1103         mutex_unlock(&kvm->slots_lock);
1104
1105         return res;
1106 }
1107
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109                                      struct kvm_device_attr *attr)
1110 {
1111         u64 mig = kvm->arch.migration_mode;
1112
1113         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114                 return -ENXIO;
1115
1116         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117                 return -EFAULT;
1118         return 0;
1119 }
1120
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         struct kvm_s390_vm_tod_clock gtod;
1124
1125         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126                 return -EFAULT;
1127
1128         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129                 return -EINVAL;
1130         kvm_s390_set_tod_clock(kvm, &gtod);
1131
1132         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133                 gtod.epoch_idx, gtod.tod);
1134
1135         return 0;
1136 }
1137
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         u8 gtod_high;
1141
1142         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143                                            sizeof(gtod_high)))
1144                 return -EFAULT;
1145
1146         if (gtod_high != 0)
1147                 return -EINVAL;
1148         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149
1150         return 0;
1151 }
1152
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155         struct kvm_s390_vm_tod_clock gtod = { 0 };
1156
1157         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158                            sizeof(gtod.tod)))
1159                 return -EFAULT;
1160
1161         kvm_s390_set_tod_clock(kvm, &gtod);
1162         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163         return 0;
1164 }
1165
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         int ret;
1169
1170         if (attr->flags)
1171                 return -EINVAL;
1172
1173         switch (attr->attr) {
1174         case KVM_S390_VM_TOD_EXT:
1175                 ret = kvm_s390_set_tod_ext(kvm, attr);
1176                 break;
1177         case KVM_S390_VM_TOD_HIGH:
1178                 ret = kvm_s390_set_tod_high(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_LOW:
1181                 ret = kvm_s390_set_tod_low(kvm, attr);
1182                 break;
1183         default:
1184                 ret = -ENXIO;
1185                 break;
1186         }
1187         return ret;
1188 }
1189
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191                                    struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193         union tod_clock clk;
1194
1195         preempt_disable();
1196
1197         store_tod_clock_ext(&clk);
1198
1199         gtod->tod = clk.tod + kvm->arch.epoch;
1200         gtod->epoch_idx = 0;
1201         if (test_kvm_facility(kvm, 139)) {
1202                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203                 if (gtod->tod < clk.tod)
1204                         gtod->epoch_idx += 1;
1205         }
1206
1207         preempt_enable();
1208 }
1209
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212         struct kvm_s390_vm_tod_clock gtod;
1213
1214         memset(&gtod, 0, sizeof(gtod));
1215         kvm_s390_get_tod_clock(kvm, &gtod);
1216         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217                 return -EFAULT;
1218
1219         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220                 gtod.epoch_idx, gtod.tod);
1221         return 0;
1222 }
1223
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226         u8 gtod_high = 0;
1227
1228         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229                                          sizeof(gtod_high)))
1230                 return -EFAULT;
1231         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232
1233         return 0;
1234 }
1235
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238         u64 gtod;
1239
1240         gtod = kvm_s390_get_tod_clock_fast(kvm);
1241         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242                 return -EFAULT;
1243         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244
1245         return 0;
1246 }
1247
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250         int ret;
1251
1252         if (attr->flags)
1253                 return -EINVAL;
1254
1255         switch (attr->attr) {
1256         case KVM_S390_VM_TOD_EXT:
1257                 ret = kvm_s390_get_tod_ext(kvm, attr);
1258                 break;
1259         case KVM_S390_VM_TOD_HIGH:
1260                 ret = kvm_s390_get_tod_high(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_TOD_LOW:
1263                 ret = kvm_s390_get_tod_low(kvm, attr);
1264                 break;
1265         default:
1266                 ret = -ENXIO;
1267                 break;
1268         }
1269         return ret;
1270 }
1271
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274         struct kvm_s390_vm_cpu_processor *proc;
1275         u16 lowest_ibc, unblocked_ibc;
1276         int ret = 0;
1277
1278         mutex_lock(&kvm->lock);
1279         if (kvm->created_vcpus) {
1280                 ret = -EBUSY;
1281                 goto out;
1282         }
1283         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284         if (!proc) {
1285                 ret = -ENOMEM;
1286                 goto out;
1287         }
1288         if (!copy_from_user(proc, (void __user *)attr->addr,
1289                             sizeof(*proc))) {
1290                 kvm->arch.model.cpuid = proc->cpuid;
1291                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292                 unblocked_ibc = sclp.ibc & 0xfff;
1293                 if (lowest_ibc && proc->ibc) {
1294                         if (proc->ibc > unblocked_ibc)
1295                                 kvm->arch.model.ibc = unblocked_ibc;
1296                         else if (proc->ibc < lowest_ibc)
1297                                 kvm->arch.model.ibc = lowest_ibc;
1298                         else
1299                                 kvm->arch.model.ibc = proc->ibc;
1300                 }
1301                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1303                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304                          kvm->arch.model.ibc,
1305                          kvm->arch.model.cpuid);
1306                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307                          kvm->arch.model.fac_list[0],
1308                          kvm->arch.model.fac_list[1],
1309                          kvm->arch.model.fac_list[2]);
1310         } else
1311                 ret = -EFAULT;
1312         kfree(proc);
1313 out:
1314         mutex_unlock(&kvm->lock);
1315         return ret;
1316 }
1317
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319                                        struct kvm_device_attr *attr)
1320 {
1321         struct kvm_s390_vm_cpu_feat data;
1322
1323         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324                 return -EFAULT;
1325         if (!bitmap_subset((unsigned long *) data.feat,
1326                            kvm_s390_available_cpu_feat,
1327                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1328                 return -EINVAL;
1329
1330         mutex_lock(&kvm->lock);
1331         if (kvm->created_vcpus) {
1332                 mutex_unlock(&kvm->lock);
1333                 return -EBUSY;
1334         }
1335         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1337         mutex_unlock(&kvm->lock);
1338         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339                          data.feat[0],
1340                          data.feat[1],
1341                          data.feat[2]);
1342         return 0;
1343 }
1344
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346                                           struct kvm_device_attr *attr)
1347 {
1348         mutex_lock(&kvm->lock);
1349         if (kvm->created_vcpus) {
1350                 mutex_unlock(&kvm->lock);
1351                 return -EBUSY;
1352         }
1353
1354         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356                 mutex_unlock(&kvm->lock);
1357                 return -EFAULT;
1358         }
1359         mutex_unlock(&kvm->lock);
1360
1361         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418
1419         return 0;
1420 }
1421
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424         int ret = -ENXIO;
1425
1426         switch (attr->attr) {
1427         case KVM_S390_VM_CPU_PROCESSOR:
1428                 ret = kvm_s390_set_processor(kvm, attr);
1429                 break;
1430         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431                 ret = kvm_s390_set_processor_feat(kvm, attr);
1432                 break;
1433         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435                 break;
1436         }
1437         return ret;
1438 }
1439
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442         struct kvm_s390_vm_cpu_processor *proc;
1443         int ret = 0;
1444
1445         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446         if (!proc) {
1447                 ret = -ENOMEM;
1448                 goto out;
1449         }
1450         proc->cpuid = kvm->arch.model.cpuid;
1451         proc->ibc = kvm->arch.model.ibc;
1452         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453                S390_ARCH_FAC_LIST_SIZE_BYTE);
1454         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455                  kvm->arch.model.ibc,
1456                  kvm->arch.model.cpuid);
1457         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458                  kvm->arch.model.fac_list[0],
1459                  kvm->arch.model.fac_list[1],
1460                  kvm->arch.model.fac_list[2]);
1461         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462                 ret = -EFAULT;
1463         kfree(proc);
1464 out:
1465         return ret;
1466 }
1467
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470         struct kvm_s390_vm_cpu_machine *mach;
1471         int ret = 0;
1472
1473         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474         if (!mach) {
1475                 ret = -ENOMEM;
1476                 goto out;
1477         }
1478         get_cpu_id((struct cpuid *) &mach->cpuid);
1479         mach->ibc = sclp.ibc;
1480         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481                S390_ARCH_FAC_LIST_SIZE_BYTE);
1482         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483                sizeof(stfle_fac_list));
1484         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485                  kvm->arch.model.ibc,
1486                  kvm->arch.model.cpuid);
1487         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488                  mach->fac_mask[0],
1489                  mach->fac_mask[1],
1490                  mach->fac_mask[2]);
1491         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492                  mach->fac_list[0],
1493                  mach->fac_list[1],
1494                  mach->fac_list[2]);
1495         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496                 ret = -EFAULT;
1497         kfree(mach);
1498 out:
1499         return ret;
1500 }
1501
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503                                        struct kvm_device_attr *attr)
1504 {
1505         struct kvm_s390_vm_cpu_feat data;
1506
1507         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1509         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510                 return -EFAULT;
1511         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512                          data.feat[0],
1513                          data.feat[1],
1514                          data.feat[2]);
1515         return 0;
1516 }
1517
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519                                      struct kvm_device_attr *attr)
1520 {
1521         struct kvm_s390_vm_cpu_feat data;
1522
1523         bitmap_copy((unsigned long *) data.feat,
1524                     kvm_s390_available_cpu_feat,
1525                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1526         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527                 return -EFAULT;
1528         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529                          data.feat[0],
1530                          data.feat[1],
1531                          data.feat[2]);
1532         return 0;
1533 }
1534
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536                                           struct kvm_device_attr *attr)
1537 {
1538         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540                 return -EFAULT;
1541
1542         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599
1600         return 0;
1601 }
1602
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604                                         struct kvm_device_attr *attr)
1605 {
1606         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608                 return -EFAULT;
1609
1610         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667
1668         return 0;
1669 }
1670
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673         int ret = -ENXIO;
1674
1675         switch (attr->attr) {
1676         case KVM_S390_VM_CPU_PROCESSOR:
1677                 ret = kvm_s390_get_processor(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_CPU_MACHINE:
1680                 ret = kvm_s390_get_machine(kvm, attr);
1681                 break;
1682         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683                 ret = kvm_s390_get_processor_feat(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_CPU_MACHINE_FEAT:
1686                 ret = kvm_s390_get_machine_feat(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690                 break;
1691         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693                 break;
1694         }
1695         return ret;
1696 }
1697
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700         int ret;
1701
1702         switch (attr->group) {
1703         case KVM_S390_VM_MEM_CTRL:
1704                 ret = kvm_s390_set_mem_control(kvm, attr);
1705                 break;
1706         case KVM_S390_VM_TOD:
1707                 ret = kvm_s390_set_tod(kvm, attr);
1708                 break;
1709         case KVM_S390_VM_CPU_MODEL:
1710                 ret = kvm_s390_set_cpu_model(kvm, attr);
1711                 break;
1712         case KVM_S390_VM_CRYPTO:
1713                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1714                 break;
1715         case KVM_S390_VM_MIGRATION:
1716                 ret = kvm_s390_vm_set_migration(kvm, attr);
1717                 break;
1718         default:
1719                 ret = -ENXIO;
1720                 break;
1721         }
1722
1723         return ret;
1724 }
1725
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728         int ret;
1729
1730         switch (attr->group) {
1731         case KVM_S390_VM_MEM_CTRL:
1732                 ret = kvm_s390_get_mem_control(kvm, attr);
1733                 break;
1734         case KVM_S390_VM_TOD:
1735                 ret = kvm_s390_get_tod(kvm, attr);
1736                 break;
1737         case KVM_S390_VM_CPU_MODEL:
1738                 ret = kvm_s390_get_cpu_model(kvm, attr);
1739                 break;
1740         case KVM_S390_VM_MIGRATION:
1741                 ret = kvm_s390_vm_get_migration(kvm, attr);
1742                 break;
1743         default:
1744                 ret = -ENXIO;
1745                 break;
1746         }
1747
1748         return ret;
1749 }
1750
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753         int ret;
1754
1755         switch (attr->group) {
1756         case KVM_S390_VM_MEM_CTRL:
1757                 switch (attr->attr) {
1758                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1759                 case KVM_S390_VM_MEM_CLR_CMMA:
1760                         ret = sclp.has_cmma ? 0 : -ENXIO;
1761                         break;
1762                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1763                         ret = 0;
1764                         break;
1765                 default:
1766                         ret = -ENXIO;
1767                         break;
1768                 }
1769                 break;
1770         case KVM_S390_VM_TOD:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_TOD_LOW:
1773                 case KVM_S390_VM_TOD_HIGH:
1774                         ret = 0;
1775                         break;
1776                 default:
1777                         ret = -ENXIO;
1778                         break;
1779                 }
1780                 break;
1781         case KVM_S390_VM_CPU_MODEL:
1782                 switch (attr->attr) {
1783                 case KVM_S390_VM_CPU_PROCESSOR:
1784                 case KVM_S390_VM_CPU_MACHINE:
1785                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1787                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789                         ret = 0;
1790                         break;
1791                 default:
1792                         ret = -ENXIO;
1793                         break;
1794                 }
1795                 break;
1796         case KVM_S390_VM_CRYPTO:
1797                 switch (attr->attr) {
1798                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802                         ret = 0;
1803                         break;
1804                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806                         ret = ap_instructions_available() ? 0 : -ENXIO;
1807                         break;
1808                 default:
1809                         ret = -ENXIO;
1810                         break;
1811                 }
1812                 break;
1813         case KVM_S390_VM_MIGRATION:
1814                 ret = 0;
1815                 break;
1816         default:
1817                 ret = -ENXIO;
1818                 break;
1819         }
1820
1821         return ret;
1822 }
1823
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826         uint8_t *keys;
1827         uint64_t hva;
1828         int srcu_idx, i, r = 0;
1829
1830         if (args->flags != 0)
1831                 return -EINVAL;
1832
1833         /* Is this guest using storage keys? */
1834         if (!mm_uses_skeys(current->mm))
1835                 return KVM_S390_GET_SKEYS_NONE;
1836
1837         /* Enforce sane limit on memory allocation */
1838         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839                 return -EINVAL;
1840
1841         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842         if (!keys)
1843                 return -ENOMEM;
1844
1845         mmap_read_lock(current->mm);
1846         srcu_idx = srcu_read_lock(&kvm->srcu);
1847         for (i = 0; i < args->count; i++) {
1848                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1849                 if (kvm_is_error_hva(hva)) {
1850                         r = -EFAULT;
1851                         break;
1852                 }
1853
1854                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855                 if (r)
1856                         break;
1857         }
1858         srcu_read_unlock(&kvm->srcu, srcu_idx);
1859         mmap_read_unlock(current->mm);
1860
1861         if (!r) {
1862                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863                                  sizeof(uint8_t) * args->count);
1864                 if (r)
1865                         r = -EFAULT;
1866         }
1867
1868         kvfree(keys);
1869         return r;
1870 }
1871
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874         uint8_t *keys;
1875         uint64_t hva;
1876         int srcu_idx, i, r = 0;
1877         bool unlocked;
1878
1879         if (args->flags != 0)
1880                 return -EINVAL;
1881
1882         /* Enforce sane limit on memory allocation */
1883         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884                 return -EINVAL;
1885
1886         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887         if (!keys)
1888                 return -ENOMEM;
1889
1890         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891                            sizeof(uint8_t) * args->count);
1892         if (r) {
1893                 r = -EFAULT;
1894                 goto out;
1895         }
1896
1897         /* Enable storage key handling for the guest */
1898         r = s390_enable_skey();
1899         if (r)
1900                 goto out;
1901
1902         i = 0;
1903         mmap_read_lock(current->mm);
1904         srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906                 unlocked = false;
1907                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1908                 if (kvm_is_error_hva(hva)) {
1909                         r = -EFAULT;
1910                         break;
1911                 }
1912
1913                 /* Lowest order bit is reserved */
1914                 if (keys[i] & 0x01) {
1915                         r = -EINVAL;
1916                         break;
1917                 }
1918
1919                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920                 if (r) {
1921                         r = fixup_user_fault(current->mm, hva,
1922                                              FAULT_FLAG_WRITE, &unlocked);
1923                         if (r)
1924                                 break;
1925                 }
1926                 if (!r)
1927                         i++;
1928         }
1929         srcu_read_unlock(&kvm->srcu, srcu_idx);
1930         mmap_read_unlock(current->mm);
1931 out:
1932         kvfree(keys);
1933         return r;
1934 }
1935
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946                               u8 *res, unsigned long bufsize)
1947 {
1948         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1949
1950         args->count = 0;
1951         while (args->count < bufsize) {
1952                 hva = gfn_to_hva(kvm, cur_gfn);
1953                 /*
1954                  * We return an error if the first value was invalid, but we
1955                  * return successfully if at least one value was copied.
1956                  */
1957                 if (kvm_is_error_hva(hva))
1958                         return args->count ? 0 : -EFAULT;
1959                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1960                         pgstev = 0;
1961                 res[args->count++] = (pgstev >> 24) & 0x43;
1962                 cur_gfn++;
1963         }
1964
1965         return 0;
1966 }
1967
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1969                                                      gfn_t gfn)
1970 {
1971         return ____gfn_to_memslot(slots, gfn, true);
1972 }
1973
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975                                               unsigned long cur_gfn)
1976 {
1977         struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978         unsigned long ofs = cur_gfn - ms->base_gfn;
1979         struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1980
1981         if (ms->base_gfn + ms->npages <= cur_gfn) {
1982                 mnode = rb_next(mnode);
1983                 /* If we are above the highest slot, wrap around */
1984                 if (!mnode)
1985                         mnode = rb_first(&slots->gfn_tree);
1986
1987                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1988                 ofs = 0;
1989         }
1990         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991         while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993                 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1994         }
1995         return ms->base_gfn + ofs;
1996 }
1997
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999                              u8 *res, unsigned long bufsize)
2000 {
2001         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002         struct kvm_memslots *slots = kvm_memslots(kvm);
2003         struct kvm_memory_slot *ms;
2004
2005         if (unlikely(kvm_memslots_empty(slots)))
2006                 return 0;
2007
2008         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009         ms = gfn_to_memslot(kvm, cur_gfn);
2010         args->count = 0;
2011         args->start_gfn = cur_gfn;
2012         if (!ms)
2013                 return 0;
2014         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015         mem_end = kvm_s390_get_gfn_end(slots);
2016
2017         while (args->count < bufsize) {
2018                 hva = gfn_to_hva(kvm, cur_gfn);
2019                 if (kvm_is_error_hva(hva))
2020                         return 0;
2021                 /* Decrement only if we actually flipped the bit to 0 */
2022                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025                         pgstev = 0;
2026                 /* Save the value */
2027                 res[args->count++] = (pgstev >> 24) & 0x43;
2028                 /* If the next bit is too far away, stop. */
2029                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030                         return 0;
2031                 /* If we reached the previous "next", find the next one */
2032                 if (cur_gfn == next_gfn)
2033                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034                 /* Reached the end of memory or of the buffer, stop */
2035                 if ((next_gfn >= mem_end) ||
2036                     (next_gfn - args->start_gfn >= bufsize))
2037                         return 0;
2038                 cur_gfn++;
2039                 /* Reached the end of the current memslot, take the next one. */
2040                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2041                         ms = gfn_to_memslot(kvm, cur_gfn);
2042                         if (!ms)
2043                                 return 0;
2044                 }
2045         }
2046         return 0;
2047 }
2048
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058                                   struct kvm_s390_cmma_log *args)
2059 {
2060         unsigned long bufsize;
2061         int srcu_idx, peek, ret;
2062         u8 *values;
2063
2064         if (!kvm->arch.use_cmma)
2065                 return -ENXIO;
2066         /* Invalid/unsupported flags were specified */
2067         if (args->flags & ~KVM_S390_CMMA_PEEK)
2068                 return -EINVAL;
2069         /* Migration mode query, and we are not doing a migration */
2070         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071         if (!peek && !kvm->arch.migration_mode)
2072                 return -EINVAL;
2073         /* CMMA is disabled or was not used, or the buffer has length zero */
2074         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075         if (!bufsize || !kvm->mm->context.uses_cmm) {
2076                 memset(args, 0, sizeof(*args));
2077                 return 0;
2078         }
2079         /* We are not peeking, and there are no dirty pages */
2080         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081                 memset(args, 0, sizeof(*args));
2082                 return 0;
2083         }
2084
2085         values = vmalloc(bufsize);
2086         if (!values)
2087                 return -ENOMEM;
2088
2089         mmap_read_lock(kvm->mm);
2090         srcu_idx = srcu_read_lock(&kvm->srcu);
2091         if (peek)
2092                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093         else
2094                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095         srcu_read_unlock(&kvm->srcu, srcu_idx);
2096         mmap_read_unlock(kvm->mm);
2097
2098         if (kvm->arch.migration_mode)
2099                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100         else
2101                 args->remaining = 0;
2102
2103         if (copy_to_user((void __user *)args->values, values, args->count))
2104                 ret = -EFAULT;
2105
2106         vfree(values);
2107         return ret;
2108 }
2109
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116                                   const struct kvm_s390_cmma_log *args)
2117 {
2118         unsigned long hva, mask, pgstev, i;
2119         uint8_t *bits;
2120         int srcu_idx, r = 0;
2121
2122         mask = args->mask;
2123
2124         if (!kvm->arch.use_cmma)
2125                 return -ENXIO;
2126         /* invalid/unsupported flags */
2127         if (args->flags != 0)
2128                 return -EINVAL;
2129         /* Enforce sane limit on memory allocation */
2130         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131                 return -EINVAL;
2132         /* Nothing to do */
2133         if (args->count == 0)
2134                 return 0;
2135
2136         bits = vmalloc(array_size(sizeof(*bits), args->count));
2137         if (!bits)
2138                 return -ENOMEM;
2139
2140         r = copy_from_user(bits, (void __user *)args->values, args->count);
2141         if (r) {
2142                 r = -EFAULT;
2143                 goto out;
2144         }
2145
2146         mmap_read_lock(kvm->mm);
2147         srcu_idx = srcu_read_lock(&kvm->srcu);
2148         for (i = 0; i < args->count; i++) {
2149                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2150                 if (kvm_is_error_hva(hva)) {
2151                         r = -EFAULT;
2152                         break;
2153                 }
2154
2155                 pgstev = bits[i];
2156                 pgstev = pgstev << 24;
2157                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159         }
2160         srcu_read_unlock(&kvm->srcu, srcu_idx);
2161         mmap_read_unlock(kvm->mm);
2162
2163         if (!kvm->mm->context.uses_cmm) {
2164                 mmap_write_lock(kvm->mm);
2165                 kvm->mm->context.uses_cmm = 1;
2166                 mmap_write_unlock(kvm->mm);
2167         }
2168 out:
2169         vfree(bits);
2170         return r;
2171 }
2172
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175         struct kvm_vcpu *vcpu;
2176         u16 rc, rrc;
2177         int ret = 0;
2178         unsigned long i;
2179
2180         /*
2181          * We ignore failures and try to destroy as many CPUs as possible.
2182          * At the same time we must not free the assigned resources when
2183          * this fails, as the ultravisor has still access to that memory.
2184          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185          * behind.
2186          * We want to return the first failure rc and rrc, though.
2187          */
2188         kvm_for_each_vcpu(i, vcpu, kvm) {
2189                 mutex_lock(&vcpu->mutex);
2190                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191                         *rcp = rc;
2192                         *rrcp = rrc;
2193                         ret = -EIO;
2194                 }
2195                 mutex_unlock(&vcpu->mutex);
2196         }
2197         return ret;
2198 }
2199
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2201 {
2202         unsigned long i;
2203         int r = 0;
2204         u16 dummy;
2205
2206         struct kvm_vcpu *vcpu;
2207
2208         kvm_for_each_vcpu(i, vcpu, kvm) {
2209                 mutex_lock(&vcpu->mutex);
2210                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2211                 mutex_unlock(&vcpu->mutex);
2212                 if (r)
2213                         break;
2214         }
2215         if (r)
2216                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2217         return r;
2218 }
2219
2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2221 {
2222         int r = 0;
2223         u16 dummy;
2224         void __user *argp = (void __user *)cmd->data;
2225
2226         switch (cmd->cmd) {
2227         case KVM_PV_ENABLE: {
2228                 r = -EINVAL;
2229                 if (kvm_s390_pv_is_protected(kvm))
2230                         break;
2231
2232                 /*
2233                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2234                  *  esca, we need no cleanup in the error cases below
2235                  */
2236                 r = sca_switch_to_extended(kvm);
2237                 if (r)
2238                         break;
2239
2240                 mmap_write_lock(current->mm);
2241                 r = gmap_mark_unmergeable();
2242                 mmap_write_unlock(current->mm);
2243                 if (r)
2244                         break;
2245
2246                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2247                 if (r)
2248                         break;
2249
2250                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2251                 if (r)
2252                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2253
2254                 /* we need to block service interrupts from now on */
2255                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2256                 break;
2257         }
2258         case KVM_PV_DISABLE: {
2259                 r = -EINVAL;
2260                 if (!kvm_s390_pv_is_protected(kvm))
2261                         break;
2262
2263                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2264                 /*
2265                  * If a CPU could not be destroyed, destroy VM will also fail.
2266                  * There is no point in trying to destroy it. Instead return
2267                  * the rc and rrc from the first CPU that failed destroying.
2268                  */
2269                 if (r)
2270                         break;
2271                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2272
2273                 /* no need to block service interrupts any more */
2274                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2275                 break;
2276         }
2277         case KVM_PV_SET_SEC_PARMS: {
2278                 struct kvm_s390_pv_sec_parm parms = {};
2279                 void *hdr;
2280
2281                 r = -EINVAL;
2282                 if (!kvm_s390_pv_is_protected(kvm))
2283                         break;
2284
2285                 r = -EFAULT;
2286                 if (copy_from_user(&parms, argp, sizeof(parms)))
2287                         break;
2288
2289                 /* Currently restricted to 8KB */
2290                 r = -EINVAL;
2291                 if (parms.length > PAGE_SIZE * 2)
2292                         break;
2293
2294                 r = -ENOMEM;
2295                 hdr = vmalloc(parms.length);
2296                 if (!hdr)
2297                         break;
2298
2299                 r = -EFAULT;
2300                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2301                                     parms.length))
2302                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2303                                                       &cmd->rc, &cmd->rrc);
2304
2305                 vfree(hdr);
2306                 break;
2307         }
2308         case KVM_PV_UNPACK: {
2309                 struct kvm_s390_pv_unp unp = {};
2310
2311                 r = -EINVAL;
2312                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2313                         break;
2314
2315                 r = -EFAULT;
2316                 if (copy_from_user(&unp, argp, sizeof(unp)))
2317                         break;
2318
2319                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2320                                        &cmd->rc, &cmd->rrc);
2321                 break;
2322         }
2323         case KVM_PV_VERIFY: {
2324                 r = -EINVAL;
2325                 if (!kvm_s390_pv_is_protected(kvm))
2326                         break;
2327
2328                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2329                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2330                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2331                              cmd->rrc);
2332                 break;
2333         }
2334         case KVM_PV_PREP_RESET: {
2335                 r = -EINVAL;
2336                 if (!kvm_s390_pv_is_protected(kvm))
2337                         break;
2338
2339                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2340                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2341                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2342                              cmd->rc, cmd->rrc);
2343                 break;
2344         }
2345         case KVM_PV_UNSHARE_ALL: {
2346                 r = -EINVAL;
2347                 if (!kvm_s390_pv_is_protected(kvm))
2348                         break;
2349
2350                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2352                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2353                              cmd->rc, cmd->rrc);
2354                 break;
2355         }
2356         default:
2357                 r = -ENOTTY;
2358         }
2359         return r;
2360 }
2361
2362 static bool access_key_invalid(u8 access_key)
2363 {
2364         return access_key > 0xf;
2365 }
2366
2367 long kvm_arch_vm_ioctl(struct file *filp,
2368                        unsigned int ioctl, unsigned long arg)
2369 {
2370         struct kvm *kvm = filp->private_data;
2371         void __user *argp = (void __user *)arg;
2372         struct kvm_device_attr attr;
2373         int r;
2374
2375         switch (ioctl) {
2376         case KVM_S390_INTERRUPT: {
2377                 struct kvm_s390_interrupt s390int;
2378
2379                 r = -EFAULT;
2380                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2381                         break;
2382                 r = kvm_s390_inject_vm(kvm, &s390int);
2383                 break;
2384         }
2385         case KVM_CREATE_IRQCHIP: {
2386                 struct kvm_irq_routing_entry routing;
2387
2388                 r = -EINVAL;
2389                 if (kvm->arch.use_irqchip) {
2390                         /* Set up dummy routing. */
2391                         memset(&routing, 0, sizeof(routing));
2392                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2393                 }
2394                 break;
2395         }
2396         case KVM_SET_DEVICE_ATTR: {
2397                 r = -EFAULT;
2398                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2399                         break;
2400                 r = kvm_s390_vm_set_attr(kvm, &attr);
2401                 break;
2402         }
2403         case KVM_GET_DEVICE_ATTR: {
2404                 r = -EFAULT;
2405                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2406                         break;
2407                 r = kvm_s390_vm_get_attr(kvm, &attr);
2408                 break;
2409         }
2410         case KVM_HAS_DEVICE_ATTR: {
2411                 r = -EFAULT;
2412                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2413                         break;
2414                 r = kvm_s390_vm_has_attr(kvm, &attr);
2415                 break;
2416         }
2417         case KVM_S390_GET_SKEYS: {
2418                 struct kvm_s390_skeys args;
2419
2420                 r = -EFAULT;
2421                 if (copy_from_user(&args, argp,
2422                                    sizeof(struct kvm_s390_skeys)))
2423                         break;
2424                 r = kvm_s390_get_skeys(kvm, &args);
2425                 break;
2426         }
2427         case KVM_S390_SET_SKEYS: {
2428                 struct kvm_s390_skeys args;
2429
2430                 r = -EFAULT;
2431                 if (copy_from_user(&args, argp,
2432                                    sizeof(struct kvm_s390_skeys)))
2433                         break;
2434                 r = kvm_s390_set_skeys(kvm, &args);
2435                 break;
2436         }
2437         case KVM_S390_GET_CMMA_BITS: {
2438                 struct kvm_s390_cmma_log args;
2439
2440                 r = -EFAULT;
2441                 if (copy_from_user(&args, argp, sizeof(args)))
2442                         break;
2443                 mutex_lock(&kvm->slots_lock);
2444                 r = kvm_s390_get_cmma_bits(kvm, &args);
2445                 mutex_unlock(&kvm->slots_lock);
2446                 if (!r) {
2447                         r = copy_to_user(argp, &args, sizeof(args));
2448                         if (r)
2449                                 r = -EFAULT;
2450                 }
2451                 break;
2452         }
2453         case KVM_S390_SET_CMMA_BITS: {
2454                 struct kvm_s390_cmma_log args;
2455
2456                 r = -EFAULT;
2457                 if (copy_from_user(&args, argp, sizeof(args)))
2458                         break;
2459                 mutex_lock(&kvm->slots_lock);
2460                 r = kvm_s390_set_cmma_bits(kvm, &args);
2461                 mutex_unlock(&kvm->slots_lock);
2462                 break;
2463         }
2464         case KVM_S390_PV_COMMAND: {
2465                 struct kvm_pv_cmd args;
2466
2467                 /* protvirt means user cpu state */
2468                 kvm_s390_set_user_cpu_state_ctrl(kvm);
2469                 r = 0;
2470                 if (!is_prot_virt_host()) {
2471                         r = -EINVAL;
2472                         break;
2473                 }
2474                 if (copy_from_user(&args, argp, sizeof(args))) {
2475                         r = -EFAULT;
2476                         break;
2477                 }
2478                 if (args.flags) {
2479                         r = -EINVAL;
2480                         break;
2481                 }
2482                 mutex_lock(&kvm->lock);
2483                 r = kvm_s390_handle_pv(kvm, &args);
2484                 mutex_unlock(&kvm->lock);
2485                 if (copy_to_user(argp, &args, sizeof(args))) {
2486                         r = -EFAULT;
2487                         break;
2488                 }
2489                 break;
2490         }
2491         default:
2492                 r = -ENOTTY;
2493         }
2494
2495         return r;
2496 }
2497
2498 static int kvm_s390_apxa_installed(void)
2499 {
2500         struct ap_config_info info;
2501
2502         if (ap_instructions_available()) {
2503                 if (ap_qci(&info) == 0)
2504                         return info.apxa;
2505         }
2506
2507         return 0;
2508 }
2509
2510 /*
2511  * The format of the crypto control block (CRYCB) is specified in the 3 low
2512  * order bits of the CRYCB designation (CRYCBD) field as follows:
2513  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2514  *           AP extended addressing (APXA) facility are installed.
2515  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2516  * Format 2: Both the APXA and MSAX3 facilities are installed
2517  */
2518 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2519 {
2520         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2521
2522         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2523         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2524
2525         /* Check whether MSAX3 is installed */
2526         if (!test_kvm_facility(kvm, 76))
2527                 return;
2528
2529         if (kvm_s390_apxa_installed())
2530                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2531         else
2532                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2533 }
2534
2535 /*
2536  * kvm_arch_crypto_set_masks
2537  *
2538  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2539  *       to be set.
2540  * @apm: the mask identifying the accessible AP adapters
2541  * @aqm: the mask identifying the accessible AP domains
2542  * @adm: the mask identifying the accessible AP control domains
2543  *
2544  * Set the masks that identify the adapters, domains and control domains to
2545  * which the KVM guest is granted access.
2546  *
2547  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2548  *       function.
2549  */
2550 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2551                                unsigned long *aqm, unsigned long *adm)
2552 {
2553         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2554
2555         kvm_s390_vcpu_block_all(kvm);
2556
2557         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2558         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2559                 memcpy(crycb->apcb1.apm, apm, 32);
2560                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2561                          apm[0], apm[1], apm[2], apm[3]);
2562                 memcpy(crycb->apcb1.aqm, aqm, 32);
2563                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2564                          aqm[0], aqm[1], aqm[2], aqm[3]);
2565                 memcpy(crycb->apcb1.adm, adm, 32);
2566                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2567                          adm[0], adm[1], adm[2], adm[3]);
2568                 break;
2569         case CRYCB_FORMAT1:
2570         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2571                 memcpy(crycb->apcb0.apm, apm, 8);
2572                 memcpy(crycb->apcb0.aqm, aqm, 2);
2573                 memcpy(crycb->apcb0.adm, adm, 2);
2574                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2575                          apm[0], *((unsigned short *)aqm),
2576                          *((unsigned short *)adm));
2577                 break;
2578         default:        /* Can not happen */
2579                 break;
2580         }
2581
2582         /* recreate the shadow crycb for each vcpu */
2583         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2584         kvm_s390_vcpu_unblock_all(kvm);
2585 }
2586 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2587
2588 /*
2589  * kvm_arch_crypto_clear_masks
2590  *
2591  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2592  *       to be cleared.
2593  *
2594  * Clear the masks that identify the adapters, domains and control domains to
2595  * which the KVM guest is granted access.
2596  *
2597  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2598  *       function.
2599  */
2600 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2601 {
2602         kvm_s390_vcpu_block_all(kvm);
2603
2604         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2605                sizeof(kvm->arch.crypto.crycb->apcb0));
2606         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2607                sizeof(kvm->arch.crypto.crycb->apcb1));
2608
2609         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2610         /* recreate the shadow crycb for each vcpu */
2611         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2612         kvm_s390_vcpu_unblock_all(kvm);
2613 }
2614 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2615
2616 static u64 kvm_s390_get_initial_cpuid(void)
2617 {
2618         struct cpuid cpuid;
2619
2620         get_cpu_id(&cpuid);
2621         cpuid.version = 0xff;
2622         return *((u64 *) &cpuid);
2623 }
2624
2625 static void kvm_s390_crypto_init(struct kvm *kvm)
2626 {
2627         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2628         kvm_s390_set_crycb_format(kvm);
2629         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2630
2631         if (!test_kvm_facility(kvm, 76))
2632                 return;
2633
2634         /* Enable AES/DEA protected key functions by default */
2635         kvm->arch.crypto.aes_kw = 1;
2636         kvm->arch.crypto.dea_kw = 1;
2637         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2638                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2639         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2640                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2641 }
2642
2643 static void sca_dispose(struct kvm *kvm)
2644 {
2645         if (kvm->arch.use_esca)
2646                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2647         else
2648                 free_page((unsigned long)(kvm->arch.sca));
2649         kvm->arch.sca = NULL;
2650 }
2651
2652 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2653 {
2654         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2655         int i, rc;
2656         char debug_name[16];
2657         static unsigned long sca_offset;
2658
2659         rc = -EINVAL;
2660 #ifdef CONFIG_KVM_S390_UCONTROL
2661         if (type & ~KVM_VM_S390_UCONTROL)
2662                 goto out_err;
2663         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2664                 goto out_err;
2665 #else
2666         if (type)
2667                 goto out_err;
2668 #endif
2669
2670         rc = s390_enable_sie();
2671         if (rc)
2672                 goto out_err;
2673
2674         rc = -ENOMEM;
2675
2676         if (!sclp.has_64bscao)
2677                 alloc_flags |= GFP_DMA;
2678         rwlock_init(&kvm->arch.sca_lock);
2679         /* start with basic SCA */
2680         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2681         if (!kvm->arch.sca)
2682                 goto out_err;
2683         mutex_lock(&kvm_lock);
2684         sca_offset += 16;
2685         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2686                 sca_offset = 0;
2687         kvm->arch.sca = (struct bsca_block *)
2688                         ((char *) kvm->arch.sca + sca_offset);
2689         mutex_unlock(&kvm_lock);
2690
2691         sprintf(debug_name, "kvm-%u", current->pid);
2692
2693         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2694         if (!kvm->arch.dbf)
2695                 goto out_err;
2696
2697         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2698         kvm->arch.sie_page2 =
2699              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2700         if (!kvm->arch.sie_page2)
2701                 goto out_err;
2702
2703         kvm->arch.sie_page2->kvm = kvm;
2704         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2705
2706         for (i = 0; i < kvm_s390_fac_size(); i++) {
2707                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2708                                               (kvm_s390_fac_base[i] |
2709                                                kvm_s390_fac_ext[i]);
2710                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2711                                               kvm_s390_fac_base[i];
2712         }
2713         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2714
2715         /* we are always in czam mode - even on pre z14 machines */
2716         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2717         set_kvm_facility(kvm->arch.model.fac_list, 138);
2718         /* we emulate STHYI in kvm */
2719         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2720         set_kvm_facility(kvm->arch.model.fac_list, 74);
2721         if (MACHINE_HAS_TLB_GUEST) {
2722                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2723                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2724         }
2725
2726         if (css_general_characteristics.aiv && test_facility(65))
2727                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2728
2729         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2730         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2731
2732         kvm_s390_crypto_init(kvm);
2733
2734         mutex_init(&kvm->arch.float_int.ais_lock);
2735         spin_lock_init(&kvm->arch.float_int.lock);
2736         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2737                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2738         init_waitqueue_head(&kvm->arch.ipte_wq);
2739         mutex_init(&kvm->arch.ipte_mutex);
2740
2741         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2742         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2743
2744         if (type & KVM_VM_S390_UCONTROL) {
2745                 kvm->arch.gmap = NULL;
2746                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2747         } else {
2748                 if (sclp.hamax == U64_MAX)
2749                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2750                 else
2751                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2752                                                     sclp.hamax + 1);
2753                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2754                 if (!kvm->arch.gmap)
2755                         goto out_err;
2756                 kvm->arch.gmap->private = kvm;
2757                 kvm->arch.gmap->pfault_enabled = 0;
2758         }
2759
2760         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2761         kvm->arch.use_skf = sclp.has_skey;
2762         spin_lock_init(&kvm->arch.start_stop_lock);
2763         kvm_s390_vsie_init(kvm);
2764         if (use_gisa)
2765                 kvm_s390_gisa_init(kvm);
2766         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2767
2768         return 0;
2769 out_err:
2770         free_page((unsigned long)kvm->arch.sie_page2);
2771         debug_unregister(kvm->arch.dbf);
2772         sca_dispose(kvm);
2773         KVM_EVENT(3, "creation of vm failed: %d", rc);
2774         return rc;
2775 }
2776
2777 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2778 {
2779         u16 rc, rrc;
2780
2781         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2782         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2783         kvm_s390_clear_local_irqs(vcpu);
2784         kvm_clear_async_pf_completion_queue(vcpu);
2785         if (!kvm_is_ucontrol(vcpu->kvm))
2786                 sca_del_vcpu(vcpu);
2787
2788         if (kvm_is_ucontrol(vcpu->kvm))
2789                 gmap_remove(vcpu->arch.gmap);
2790
2791         if (vcpu->kvm->arch.use_cmma)
2792                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2793         /* We can not hold the vcpu mutex here, we are already dying */
2794         if (kvm_s390_pv_cpu_get_handle(vcpu))
2795                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2796         free_page((unsigned long)(vcpu->arch.sie_block));
2797 }
2798
2799 void kvm_arch_destroy_vm(struct kvm *kvm)
2800 {
2801         u16 rc, rrc;
2802
2803         kvm_destroy_vcpus(kvm);
2804         sca_dispose(kvm);
2805         kvm_s390_gisa_destroy(kvm);
2806         /*
2807          * We are already at the end of life and kvm->lock is not taken.
2808          * This is ok as the file descriptor is closed by now and nobody
2809          * can mess with the pv state. To avoid lockdep_assert_held from
2810          * complaining we do not use kvm_s390_pv_is_protected.
2811          */
2812         if (kvm_s390_pv_get_handle(kvm))
2813                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2814         debug_unregister(kvm->arch.dbf);
2815         free_page((unsigned long)kvm->arch.sie_page2);
2816         if (!kvm_is_ucontrol(kvm))
2817                 gmap_remove(kvm->arch.gmap);
2818         kvm_s390_destroy_adapters(kvm);
2819         kvm_s390_clear_float_irqs(kvm);
2820         kvm_s390_vsie_destroy(kvm);
2821         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2822 }
2823
2824 /* Section: vcpu related */
2825 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2826 {
2827         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2828         if (!vcpu->arch.gmap)
2829                 return -ENOMEM;
2830         vcpu->arch.gmap->private = vcpu->kvm;
2831
2832         return 0;
2833 }
2834
2835 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2836 {
2837         if (!kvm_s390_use_sca_entries())
2838                 return;
2839         read_lock(&vcpu->kvm->arch.sca_lock);
2840         if (vcpu->kvm->arch.use_esca) {
2841                 struct esca_block *sca = vcpu->kvm->arch.sca;
2842
2843                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2844                 sca->cpu[vcpu->vcpu_id].sda = 0;
2845         } else {
2846                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2847
2848                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2849                 sca->cpu[vcpu->vcpu_id].sda = 0;
2850         }
2851         read_unlock(&vcpu->kvm->arch.sca_lock);
2852 }
2853
2854 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2855 {
2856         if (!kvm_s390_use_sca_entries()) {
2857                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2858
2859                 /* we still need the basic sca for the ipte control */
2860                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2861                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2862                 return;
2863         }
2864         read_lock(&vcpu->kvm->arch.sca_lock);
2865         if (vcpu->kvm->arch.use_esca) {
2866                 struct esca_block *sca = vcpu->kvm->arch.sca;
2867
2868                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2869                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2870                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2871                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2872                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2873         } else {
2874                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2875
2876                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2877                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2878                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2879                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2880         }
2881         read_unlock(&vcpu->kvm->arch.sca_lock);
2882 }
2883
2884 /* Basic SCA to Extended SCA data copy routines */
2885 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2886 {
2887         d->sda = s->sda;
2888         d->sigp_ctrl.c = s->sigp_ctrl.c;
2889         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2890 }
2891
2892 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2893 {
2894         int i;
2895
2896         d->ipte_control = s->ipte_control;
2897         d->mcn[0] = s->mcn;
2898         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2899                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2900 }
2901
2902 static int sca_switch_to_extended(struct kvm *kvm)
2903 {
2904         struct bsca_block *old_sca = kvm->arch.sca;
2905         struct esca_block *new_sca;
2906         struct kvm_vcpu *vcpu;
2907         unsigned long vcpu_idx;
2908         u32 scaol, scaoh;
2909
2910         if (kvm->arch.use_esca)
2911                 return 0;
2912
2913         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2914         if (!new_sca)
2915                 return -ENOMEM;
2916
2917         scaoh = (u32)((u64)(new_sca) >> 32);
2918         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2919
2920         kvm_s390_vcpu_block_all(kvm);
2921         write_lock(&kvm->arch.sca_lock);
2922
2923         sca_copy_b_to_e(new_sca, old_sca);
2924
2925         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2926                 vcpu->arch.sie_block->scaoh = scaoh;
2927                 vcpu->arch.sie_block->scaol = scaol;
2928                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2929         }
2930         kvm->arch.sca = new_sca;
2931         kvm->arch.use_esca = 1;
2932
2933         write_unlock(&kvm->arch.sca_lock);
2934         kvm_s390_vcpu_unblock_all(kvm);
2935
2936         free_page((unsigned long)old_sca);
2937
2938         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2939                  old_sca, kvm->arch.sca);
2940         return 0;
2941 }
2942
2943 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2944 {
2945         int rc;
2946
2947         if (!kvm_s390_use_sca_entries()) {
2948                 if (id < KVM_MAX_VCPUS)
2949                         return true;
2950                 return false;
2951         }
2952         if (id < KVM_S390_BSCA_CPU_SLOTS)
2953                 return true;
2954         if (!sclp.has_esca || !sclp.has_64bscao)
2955                 return false;
2956
2957         mutex_lock(&kvm->lock);
2958         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2959         mutex_unlock(&kvm->lock);
2960
2961         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2962 }
2963
2964 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2965 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2966 {
2967         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2968         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2969         vcpu->arch.cputm_start = get_tod_clock_fast();
2970         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2971 }
2972
2973 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2974 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2975 {
2976         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2977         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2978         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2979         vcpu->arch.cputm_start = 0;
2980         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2981 }
2982
2983 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2984 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2985 {
2986         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2987         vcpu->arch.cputm_enabled = true;
2988         __start_cpu_timer_accounting(vcpu);
2989 }
2990
2991 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2992 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2993 {
2994         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2995         __stop_cpu_timer_accounting(vcpu);
2996         vcpu->arch.cputm_enabled = false;
2997 }
2998
2999 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3000 {
3001         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3002         __enable_cpu_timer_accounting(vcpu);
3003         preempt_enable();
3004 }
3005
3006 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3007 {
3008         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3009         __disable_cpu_timer_accounting(vcpu);
3010         preempt_enable();
3011 }
3012
3013 /* set the cpu timer - may only be called from the VCPU thread itself */
3014 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3015 {
3016         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3017         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3018         if (vcpu->arch.cputm_enabled)
3019                 vcpu->arch.cputm_start = get_tod_clock_fast();
3020         vcpu->arch.sie_block->cputm = cputm;
3021         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3022         preempt_enable();
3023 }
3024
3025 /* update and get the cpu timer - can also be called from other VCPU threads */
3026 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3027 {
3028         unsigned int seq;
3029         __u64 value;
3030
3031         if (unlikely(!vcpu->arch.cputm_enabled))
3032                 return vcpu->arch.sie_block->cputm;
3033
3034         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3035         do {
3036                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3037                 /*
3038                  * If the writer would ever execute a read in the critical
3039                  * section, e.g. in irq context, we have a deadlock.
3040                  */
3041                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3042                 value = vcpu->arch.sie_block->cputm;
3043                 /* if cputm_start is 0, accounting is being started/stopped */
3044                 if (likely(vcpu->arch.cputm_start))
3045                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3046         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3047         preempt_enable();
3048         return value;
3049 }
3050
3051 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3052 {
3053
3054         gmap_enable(vcpu->arch.enabled_gmap);
3055         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3056         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3057                 __start_cpu_timer_accounting(vcpu);
3058         vcpu->cpu = cpu;
3059 }
3060
3061 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3062 {
3063         vcpu->cpu = -1;
3064         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3065                 __stop_cpu_timer_accounting(vcpu);
3066         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3067         vcpu->arch.enabled_gmap = gmap_get_enabled();
3068         gmap_disable(vcpu->arch.enabled_gmap);
3069
3070 }
3071
3072 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3073 {
3074         mutex_lock(&vcpu->kvm->lock);
3075         preempt_disable();
3076         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3077         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3078         preempt_enable();
3079         mutex_unlock(&vcpu->kvm->lock);
3080         if (!kvm_is_ucontrol(vcpu->kvm)) {
3081                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3082                 sca_add_vcpu(vcpu);
3083         }
3084         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3085                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3086         /* make vcpu_load load the right gmap on the first trigger */
3087         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3088 }
3089
3090 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3091 {
3092         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3093             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3094                 return true;
3095         return false;
3096 }
3097
3098 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3099 {
3100         /* At least one ECC subfunction must be present */
3101         return kvm_has_pckmo_subfunc(kvm, 32) ||
3102                kvm_has_pckmo_subfunc(kvm, 33) ||
3103                kvm_has_pckmo_subfunc(kvm, 34) ||
3104                kvm_has_pckmo_subfunc(kvm, 40) ||
3105                kvm_has_pckmo_subfunc(kvm, 41);
3106
3107 }
3108
3109 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3110 {
3111         /*
3112          * If the AP instructions are not being interpreted and the MSAX3
3113          * facility is not configured for the guest, there is nothing to set up.
3114          */
3115         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3116                 return;
3117
3118         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3119         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3120         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3121         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3122
3123         if (vcpu->kvm->arch.crypto.apie)
3124                 vcpu->arch.sie_block->eca |= ECA_APIE;
3125
3126         /* Set up protected key support */
3127         if (vcpu->kvm->arch.crypto.aes_kw) {
3128                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3129                 /* ecc is also wrapped with AES key */
3130                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3131                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3132         }
3133
3134         if (vcpu->kvm->arch.crypto.dea_kw)
3135                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3136 }
3137
3138 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3139 {
3140         free_page(vcpu->arch.sie_block->cbrlo);
3141         vcpu->arch.sie_block->cbrlo = 0;
3142 }
3143
3144 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3145 {
3146         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3147         if (!vcpu->arch.sie_block->cbrlo)
3148                 return -ENOMEM;
3149         return 0;
3150 }
3151
3152 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3153 {
3154         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3155
3156         vcpu->arch.sie_block->ibc = model->ibc;
3157         if (test_kvm_facility(vcpu->kvm, 7))
3158                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3159 }
3160
3161 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3162 {
3163         int rc = 0;
3164         u16 uvrc, uvrrc;
3165
3166         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3167                                                     CPUSTAT_SM |
3168                                                     CPUSTAT_STOPPED);
3169
3170         if (test_kvm_facility(vcpu->kvm, 78))
3171                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3172         else if (test_kvm_facility(vcpu->kvm, 8))
3173                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3174
3175         kvm_s390_vcpu_setup_model(vcpu);
3176
3177         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3178         if (MACHINE_HAS_ESOP)
3179                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3180         if (test_kvm_facility(vcpu->kvm, 9))
3181                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3182         if (test_kvm_facility(vcpu->kvm, 73))
3183                 vcpu->arch.sie_block->ecb |= ECB_TE;
3184         if (!kvm_is_ucontrol(vcpu->kvm))
3185                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3186
3187         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3188                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3189         if (test_kvm_facility(vcpu->kvm, 130))
3190                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3191         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3192         if (sclp.has_cei)
3193                 vcpu->arch.sie_block->eca |= ECA_CEI;
3194         if (sclp.has_ib)
3195                 vcpu->arch.sie_block->eca |= ECA_IB;
3196         if (sclp.has_siif)
3197                 vcpu->arch.sie_block->eca |= ECA_SII;
3198         if (sclp.has_sigpif)
3199                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3200         if (test_kvm_facility(vcpu->kvm, 129)) {
3201                 vcpu->arch.sie_block->eca |= ECA_VX;
3202                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3203         }
3204         if (test_kvm_facility(vcpu->kvm, 139))
3205                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3206         if (test_kvm_facility(vcpu->kvm, 156))
3207                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3208         if (vcpu->arch.sie_block->gd) {
3209                 vcpu->arch.sie_block->eca |= ECA_AIV;
3210                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3211                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3212         }
3213         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3214                                         | SDNXC;
3215         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3216
3217         if (sclp.has_kss)
3218                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3219         else
3220                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3221
3222         if (vcpu->kvm->arch.use_cmma) {
3223                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3224                 if (rc)
3225                         return rc;
3226         }
3227         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3228         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3229
3230         vcpu->arch.sie_block->hpid = HPID_KVM;
3231
3232         kvm_s390_vcpu_crypto_setup(vcpu);
3233
3234         mutex_lock(&vcpu->kvm->lock);
3235         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3236                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3237                 if (rc)
3238                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3239         }
3240         mutex_unlock(&vcpu->kvm->lock);
3241
3242         return rc;
3243 }
3244
3245 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3246 {
3247         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3248                 return -EINVAL;
3249         return 0;
3250 }
3251
3252 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3253 {
3254         struct sie_page *sie_page;
3255         int rc;
3256
3257         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3258         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3259         if (!sie_page)
3260                 return -ENOMEM;
3261
3262         vcpu->arch.sie_block = &sie_page->sie_block;
3263         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3264
3265         /* the real guest size will always be smaller than msl */
3266         vcpu->arch.sie_block->mso = 0;
3267         vcpu->arch.sie_block->msl = sclp.hamax;
3268
3269         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3270         spin_lock_init(&vcpu->arch.local_int.lock);
3271         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3272         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3273                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3274         seqcount_init(&vcpu->arch.cputm_seqcount);
3275
3276         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3277         kvm_clear_async_pf_completion_queue(vcpu);
3278         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3279                                     KVM_SYNC_GPRS |
3280                                     KVM_SYNC_ACRS |
3281                                     KVM_SYNC_CRS |
3282                                     KVM_SYNC_ARCH0 |
3283                                     KVM_SYNC_PFAULT |
3284                                     KVM_SYNC_DIAG318;
3285         kvm_s390_set_prefix(vcpu, 0);
3286         if (test_kvm_facility(vcpu->kvm, 64))
3287                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3288         if (test_kvm_facility(vcpu->kvm, 82))
3289                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3290         if (test_kvm_facility(vcpu->kvm, 133))
3291                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3292         if (test_kvm_facility(vcpu->kvm, 156))
3293                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3294         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3295          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3296          */
3297         if (MACHINE_HAS_VX)
3298                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3299         else
3300                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3301
3302         if (kvm_is_ucontrol(vcpu->kvm)) {
3303                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3304                 if (rc)
3305                         goto out_free_sie_block;
3306         }
3307
3308         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3309                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3310         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3311
3312         rc = kvm_s390_vcpu_setup(vcpu);
3313         if (rc)
3314                 goto out_ucontrol_uninit;
3315         return 0;
3316
3317 out_ucontrol_uninit:
3318         if (kvm_is_ucontrol(vcpu->kvm))
3319                 gmap_remove(vcpu->arch.gmap);
3320 out_free_sie_block:
3321         free_page((unsigned long)(vcpu->arch.sie_block));
3322         return rc;
3323 }
3324
3325 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3326 {
3327         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3328         return kvm_s390_vcpu_has_irq(vcpu, 0);
3329 }
3330
3331 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3332 {
3333         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3334 }
3335
3336 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3337 {
3338         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3339         exit_sie(vcpu);
3340 }
3341
3342 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3343 {
3344         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3345 }
3346
3347 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3348 {
3349         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3350         exit_sie(vcpu);
3351 }
3352
3353 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3354 {
3355         return atomic_read(&vcpu->arch.sie_block->prog20) &
3356                (PROG_BLOCK_SIE | PROG_REQUEST);
3357 }
3358
3359 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3360 {
3361         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3362 }
3363
3364 /*
3365  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3366  * If the CPU is not running (e.g. waiting as idle) the function will
3367  * return immediately. */
3368 void exit_sie(struct kvm_vcpu *vcpu)
3369 {
3370         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3371         kvm_s390_vsie_kick(vcpu);
3372         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3373                 cpu_relax();
3374 }
3375
3376 /* Kick a guest cpu out of SIE to process a request synchronously */
3377 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3378 {
3379         kvm_make_request(req, vcpu);
3380         kvm_s390_vcpu_request(vcpu);
3381 }
3382
3383 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3384                               unsigned long end)
3385 {
3386         struct kvm *kvm = gmap->private;
3387         struct kvm_vcpu *vcpu;
3388         unsigned long prefix;
3389         unsigned long i;
3390
3391         if (gmap_is_shadow(gmap))
3392                 return;
3393         if (start >= 1UL << 31)
3394                 /* We are only interested in prefix pages */
3395                 return;
3396         kvm_for_each_vcpu(i, vcpu, kvm) {
3397                 /* match against both prefix pages */
3398                 prefix = kvm_s390_get_prefix(vcpu);
3399                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3400                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3401                                    start, end);
3402                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3403                 }
3404         }
3405 }
3406
3407 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3408 {
3409         /* do not poll with more than halt_poll_max_steal percent of steal time */
3410         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3411             READ_ONCE(halt_poll_max_steal)) {
3412                 vcpu->stat.halt_no_poll_steal++;
3413                 return true;
3414         }
3415         return false;
3416 }
3417
3418 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3419 {
3420         /* kvm common code refers to this, but never calls it */
3421         BUG();
3422         return 0;
3423 }
3424
3425 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3426                                            struct kvm_one_reg *reg)
3427 {
3428         int r = -EINVAL;
3429
3430         switch (reg->id) {
3431         case KVM_REG_S390_TODPR:
3432                 r = put_user(vcpu->arch.sie_block->todpr,
3433                              (u32 __user *)reg->addr);
3434                 break;
3435         case KVM_REG_S390_EPOCHDIFF:
3436                 r = put_user(vcpu->arch.sie_block->epoch,
3437                              (u64 __user *)reg->addr);
3438                 break;
3439         case KVM_REG_S390_CPU_TIMER:
3440                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3441                              (u64 __user *)reg->addr);
3442                 break;
3443         case KVM_REG_S390_CLOCK_COMP:
3444                 r = put_user(vcpu->arch.sie_block->ckc,
3445                              (u64 __user *)reg->addr);
3446                 break;
3447         case KVM_REG_S390_PFTOKEN:
3448                 r = put_user(vcpu->arch.pfault_token,
3449                              (u64 __user *)reg->addr);
3450                 break;
3451         case KVM_REG_S390_PFCOMPARE:
3452                 r = put_user(vcpu->arch.pfault_compare,
3453                              (u64 __user *)reg->addr);
3454                 break;
3455         case KVM_REG_S390_PFSELECT:
3456                 r = put_user(vcpu->arch.pfault_select,
3457                              (u64 __user *)reg->addr);
3458                 break;
3459         case KVM_REG_S390_PP:
3460                 r = put_user(vcpu->arch.sie_block->pp,
3461                              (u64 __user *)reg->addr);
3462                 break;
3463         case KVM_REG_S390_GBEA:
3464                 r = put_user(vcpu->arch.sie_block->gbea,
3465                              (u64 __user *)reg->addr);
3466                 break;
3467         default:
3468                 break;
3469         }
3470
3471         return r;
3472 }
3473
3474 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3475                                            struct kvm_one_reg *reg)
3476 {
3477         int r = -EINVAL;
3478         __u64 val;
3479
3480         switch (reg->id) {
3481         case KVM_REG_S390_TODPR:
3482                 r = get_user(vcpu->arch.sie_block->todpr,
3483                              (u32 __user *)reg->addr);
3484                 break;
3485         case KVM_REG_S390_EPOCHDIFF:
3486                 r = get_user(vcpu->arch.sie_block->epoch,
3487                              (u64 __user *)reg->addr);
3488                 break;
3489         case KVM_REG_S390_CPU_TIMER:
3490                 r = get_user(val, (u64 __user *)reg->addr);
3491                 if (!r)
3492                         kvm_s390_set_cpu_timer(vcpu, val);
3493                 break;
3494         case KVM_REG_S390_CLOCK_COMP:
3495                 r = get_user(vcpu->arch.sie_block->ckc,
3496                              (u64 __user *)reg->addr);
3497                 break;
3498         case KVM_REG_S390_PFTOKEN:
3499                 r = get_user(vcpu->arch.pfault_token,
3500                              (u64 __user *)reg->addr);
3501                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3502                         kvm_clear_async_pf_completion_queue(vcpu);
3503                 break;
3504         case KVM_REG_S390_PFCOMPARE:
3505                 r = get_user(vcpu->arch.pfault_compare,
3506                              (u64 __user *)reg->addr);
3507                 break;
3508         case KVM_REG_S390_PFSELECT:
3509                 r = get_user(vcpu->arch.pfault_select,
3510                              (u64 __user *)reg->addr);
3511                 break;
3512         case KVM_REG_S390_PP:
3513                 r = get_user(vcpu->arch.sie_block->pp,
3514                              (u64 __user *)reg->addr);
3515                 break;
3516         case KVM_REG_S390_GBEA:
3517                 r = get_user(vcpu->arch.sie_block->gbea,
3518                              (u64 __user *)reg->addr);
3519                 break;
3520         default:
3521                 break;
3522         }
3523
3524         return r;
3525 }
3526
3527 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3528 {
3529         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3530         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3531         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3532
3533         kvm_clear_async_pf_completion_queue(vcpu);
3534         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3535                 kvm_s390_vcpu_stop(vcpu);
3536         kvm_s390_clear_local_irqs(vcpu);
3537 }
3538
3539 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3540 {
3541         /* Initial reset is a superset of the normal reset */
3542         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3543
3544         /*
3545          * This equals initial cpu reset in pop, but we don't switch to ESA.
3546          * We do not only reset the internal data, but also ...
3547          */
3548         vcpu->arch.sie_block->gpsw.mask = 0;
3549         vcpu->arch.sie_block->gpsw.addr = 0;
3550         kvm_s390_set_prefix(vcpu, 0);
3551         kvm_s390_set_cpu_timer(vcpu, 0);
3552         vcpu->arch.sie_block->ckc = 0;
3553         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3554         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3555         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3556
3557         /* ... the data in sync regs */
3558         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3559         vcpu->run->s.regs.ckc = 0;
3560         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3561         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3562         vcpu->run->psw_addr = 0;
3563         vcpu->run->psw_mask = 0;
3564         vcpu->run->s.regs.todpr = 0;
3565         vcpu->run->s.regs.cputm = 0;
3566         vcpu->run->s.regs.ckc = 0;
3567         vcpu->run->s.regs.pp = 0;
3568         vcpu->run->s.regs.gbea = 1;
3569         vcpu->run->s.regs.fpc = 0;
3570         /*
3571          * Do not reset these registers in the protected case, as some of
3572          * them are overlayed and they are not accessible in this case
3573          * anyway.
3574          */
3575         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3576                 vcpu->arch.sie_block->gbea = 1;
3577                 vcpu->arch.sie_block->pp = 0;
3578                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3579                 vcpu->arch.sie_block->todpr = 0;
3580         }
3581 }
3582
3583 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3584 {
3585         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3586
3587         /* Clear reset is a superset of the initial reset */
3588         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3589
3590         memset(&regs->gprs, 0, sizeof(regs->gprs));
3591         memset(&regs->vrs, 0, sizeof(regs->vrs));
3592         memset(&regs->acrs, 0, sizeof(regs->acrs));
3593         memset(&regs->gscb, 0, sizeof(regs->gscb));
3594
3595         regs->etoken = 0;
3596         regs->etoken_extension = 0;
3597 }
3598
3599 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3600 {
3601         vcpu_load(vcpu);
3602         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3603         vcpu_put(vcpu);
3604         return 0;
3605 }
3606
3607 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3608 {
3609         vcpu_load(vcpu);
3610         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3611         vcpu_put(vcpu);
3612         return 0;
3613 }
3614
3615 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3616                                   struct kvm_sregs *sregs)
3617 {
3618         vcpu_load(vcpu);
3619
3620         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3621         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3622
3623         vcpu_put(vcpu);
3624         return 0;
3625 }
3626
3627 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3628                                   struct kvm_sregs *sregs)
3629 {
3630         vcpu_load(vcpu);
3631
3632         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3633         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3634
3635         vcpu_put(vcpu);
3636         return 0;
3637 }
3638
3639 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3640 {
3641         int ret = 0;
3642
3643         vcpu_load(vcpu);
3644
3645         if (test_fp_ctl(fpu->fpc)) {
3646                 ret = -EINVAL;
3647                 goto out;
3648         }
3649         vcpu->run->s.regs.fpc = fpu->fpc;
3650         if (MACHINE_HAS_VX)
3651                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3652                                  (freg_t *) fpu->fprs);
3653         else
3654                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3655
3656 out:
3657         vcpu_put(vcpu);
3658         return ret;
3659 }
3660
3661 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3662 {
3663         vcpu_load(vcpu);
3664
3665         /* make sure we have the latest values */
3666         save_fpu_regs();
3667         if (MACHINE_HAS_VX)
3668                 convert_vx_to_fp((freg_t *) fpu->fprs,
3669                                  (__vector128 *) vcpu->run->s.regs.vrs);
3670         else
3671                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3672         fpu->fpc = vcpu->run->s.regs.fpc;
3673
3674         vcpu_put(vcpu);
3675         return 0;
3676 }
3677
3678 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3679 {
3680         int rc = 0;
3681
3682         if (!is_vcpu_stopped(vcpu))
3683                 rc = -EBUSY;
3684         else {
3685                 vcpu->run->psw_mask = psw.mask;
3686                 vcpu->run->psw_addr = psw.addr;
3687         }
3688         return rc;
3689 }
3690
3691 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3692                                   struct kvm_translation *tr)
3693 {
3694         return -EINVAL; /* not implemented yet */
3695 }
3696
3697 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3698                               KVM_GUESTDBG_USE_HW_BP | \
3699                               KVM_GUESTDBG_ENABLE)
3700
3701 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3702                                         struct kvm_guest_debug *dbg)
3703 {
3704         int rc = 0;
3705
3706         vcpu_load(vcpu);
3707
3708         vcpu->guest_debug = 0;
3709         kvm_s390_clear_bp_data(vcpu);
3710
3711         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3712                 rc = -EINVAL;
3713                 goto out;
3714         }
3715         if (!sclp.has_gpere) {
3716                 rc = -EINVAL;
3717                 goto out;
3718         }
3719
3720         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3721                 vcpu->guest_debug = dbg->control;
3722                 /* enforce guest PER */
3723                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3724
3725                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3726                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3727         } else {
3728                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3729                 vcpu->arch.guestdbg.last_bp = 0;
3730         }
3731
3732         if (rc) {
3733                 vcpu->guest_debug = 0;
3734                 kvm_s390_clear_bp_data(vcpu);
3735                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3736         }
3737
3738 out:
3739         vcpu_put(vcpu);
3740         return rc;
3741 }
3742
3743 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3744                                     struct kvm_mp_state *mp_state)
3745 {
3746         int ret;
3747
3748         vcpu_load(vcpu);
3749
3750         /* CHECK_STOP and LOAD are not supported yet */
3751         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3752                                       KVM_MP_STATE_OPERATING;
3753
3754         vcpu_put(vcpu);
3755         return ret;
3756 }
3757
3758 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3759                                     struct kvm_mp_state *mp_state)
3760 {
3761         int rc = 0;
3762
3763         vcpu_load(vcpu);
3764
3765         /* user space knows about this interface - let it control the state */
3766         kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3767
3768         switch (mp_state->mp_state) {
3769         case KVM_MP_STATE_STOPPED:
3770                 rc = kvm_s390_vcpu_stop(vcpu);
3771                 break;
3772         case KVM_MP_STATE_OPERATING:
3773                 rc = kvm_s390_vcpu_start(vcpu);
3774                 break;
3775         case KVM_MP_STATE_LOAD:
3776                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3777                         rc = -ENXIO;
3778                         break;
3779                 }
3780                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3781                 break;
3782         case KVM_MP_STATE_CHECK_STOP:
3783                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3784         default:
3785                 rc = -ENXIO;
3786         }
3787
3788         vcpu_put(vcpu);
3789         return rc;
3790 }
3791
3792 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3793 {
3794         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3795 }
3796
3797 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3798 {
3799 retry:
3800         kvm_s390_vcpu_request_handled(vcpu);
3801         if (!kvm_request_pending(vcpu))
3802                 return 0;
3803         /*
3804          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3805          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3806          * This ensures that the ipte instruction for this request has
3807          * already finished. We might race against a second unmapper that
3808          * wants to set the blocking bit. Lets just retry the request loop.
3809          */
3810         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3811                 int rc;
3812                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3813                                           kvm_s390_get_prefix(vcpu),
3814                                           PAGE_SIZE * 2, PROT_WRITE);
3815                 if (rc) {
3816                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3817                         return rc;
3818                 }
3819                 goto retry;
3820         }
3821
3822         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3823                 vcpu->arch.sie_block->ihcpu = 0xffff;
3824                 goto retry;
3825         }
3826
3827         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3828                 if (!ibs_enabled(vcpu)) {
3829                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3830                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3831                 }
3832                 goto retry;
3833         }
3834
3835         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3836                 if (ibs_enabled(vcpu)) {
3837                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3838                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3839                 }
3840                 goto retry;
3841         }
3842
3843         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3844                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3845                 goto retry;
3846         }
3847
3848         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3849                 /*
3850                  * Disable CMM virtualization; we will emulate the ESSA
3851                  * instruction manually, in order to provide additional
3852                  * functionalities needed for live migration.
3853                  */
3854                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3855                 goto retry;
3856         }
3857
3858         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3859                 /*
3860                  * Re-enable CMM virtualization if CMMA is available and
3861                  * CMM has been used.
3862                  */
3863                 if ((vcpu->kvm->arch.use_cmma) &&
3864                     (vcpu->kvm->mm->context.uses_cmm))
3865                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3866                 goto retry;
3867         }
3868
3869         /* nothing to do, just clear the request */
3870         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3871         /* we left the vsie handler, nothing to do, just clear the request */
3872         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3873
3874         return 0;
3875 }
3876
3877 void kvm_s390_set_tod_clock(struct kvm *kvm,
3878                             const struct kvm_s390_vm_tod_clock *gtod)
3879 {
3880         struct kvm_vcpu *vcpu;
3881         union tod_clock clk;
3882         unsigned long i;
3883
3884         mutex_lock(&kvm->lock);
3885         preempt_disable();
3886
3887         store_tod_clock_ext(&clk);
3888
3889         kvm->arch.epoch = gtod->tod - clk.tod;
3890         kvm->arch.epdx = 0;
3891         if (test_kvm_facility(kvm, 139)) {
3892                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3893                 if (kvm->arch.epoch > gtod->tod)
3894                         kvm->arch.epdx -= 1;
3895         }
3896
3897         kvm_s390_vcpu_block_all(kvm);
3898         kvm_for_each_vcpu(i, vcpu, kvm) {
3899                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3900                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3901         }
3902
3903         kvm_s390_vcpu_unblock_all(kvm);
3904         preempt_enable();
3905         mutex_unlock(&kvm->lock);
3906 }
3907
3908 /**
3909  * kvm_arch_fault_in_page - fault-in guest page if necessary
3910  * @vcpu: The corresponding virtual cpu
3911  * @gpa: Guest physical address
3912  * @writable: Whether the page should be writable or not
3913  *
3914  * Make sure that a guest page has been faulted-in on the host.
3915  *
3916  * Return: Zero on success, negative error code otherwise.
3917  */
3918 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3919 {
3920         return gmap_fault(vcpu->arch.gmap, gpa,
3921                           writable ? FAULT_FLAG_WRITE : 0);
3922 }
3923
3924 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3925                                       unsigned long token)
3926 {
3927         struct kvm_s390_interrupt inti;
3928         struct kvm_s390_irq irq;
3929
3930         if (start_token) {
3931                 irq.u.ext.ext_params2 = token;
3932                 irq.type = KVM_S390_INT_PFAULT_INIT;
3933                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3934         } else {
3935                 inti.type = KVM_S390_INT_PFAULT_DONE;
3936                 inti.parm64 = token;
3937                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3938         }
3939 }
3940
3941 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3942                                      struct kvm_async_pf *work)
3943 {
3944         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3945         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3946
3947         return true;
3948 }
3949
3950 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3951                                  struct kvm_async_pf *work)
3952 {
3953         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3954         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3955 }
3956
3957 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3958                                struct kvm_async_pf *work)
3959 {
3960         /* s390 will always inject the page directly */
3961 }
3962
3963 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3964 {
3965         /*
3966          * s390 will always inject the page directly,
3967          * but we still want check_async_completion to cleanup
3968          */
3969         return true;
3970 }
3971
3972 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3973 {
3974         hva_t hva;
3975         struct kvm_arch_async_pf arch;
3976
3977         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3978                 return false;
3979         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3980             vcpu->arch.pfault_compare)
3981                 return false;
3982         if (psw_extint_disabled(vcpu))
3983                 return false;
3984         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3985                 return false;
3986         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3987                 return false;
3988         if (!vcpu->arch.gmap->pfault_enabled)
3989                 return false;
3990
3991         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3992         hva += current->thread.gmap_addr & ~PAGE_MASK;
3993         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3994                 return false;
3995
3996         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3997 }
3998
3999 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4000 {
4001         int rc, cpuflags;
4002
4003         /*
4004          * On s390 notifications for arriving pages will be delivered directly
4005          * to the guest but the house keeping for completed pfaults is
4006          * handled outside the worker.
4007          */
4008         kvm_check_async_pf_completion(vcpu);
4009
4010         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4011         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4012
4013         if (need_resched())
4014                 schedule();
4015
4016         if (!kvm_is_ucontrol(vcpu->kvm)) {
4017                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4018                 if (rc)
4019                         return rc;
4020         }
4021
4022         rc = kvm_s390_handle_requests(vcpu);
4023         if (rc)
4024                 return rc;
4025
4026         if (guestdbg_enabled(vcpu)) {
4027                 kvm_s390_backup_guest_per_regs(vcpu);
4028                 kvm_s390_patch_guest_per_regs(vcpu);
4029         }
4030
4031         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4032
4033         vcpu->arch.sie_block->icptcode = 0;
4034         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4035         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4036         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4037
4038         return 0;
4039 }
4040
4041 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4042 {
4043         struct kvm_s390_pgm_info pgm_info = {
4044                 .code = PGM_ADDRESSING,
4045         };
4046         u8 opcode, ilen;
4047         int rc;
4048
4049         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4050         trace_kvm_s390_sie_fault(vcpu);
4051
4052         /*
4053          * We want to inject an addressing exception, which is defined as a
4054          * suppressing or terminating exception. However, since we came here
4055          * by a DAT access exception, the PSW still points to the faulting
4056          * instruction since DAT exceptions are nullifying. So we've got
4057          * to look up the current opcode to get the length of the instruction
4058          * to be able to forward the PSW.
4059          */
4060         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4061         ilen = insn_length(opcode);
4062         if (rc < 0) {
4063                 return rc;
4064         } else if (rc) {
4065                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4066                  * Forward by arbitrary ilc, injection will take care of
4067                  * nullification if necessary.
4068                  */
4069                 pgm_info = vcpu->arch.pgm;
4070                 ilen = 4;
4071         }
4072         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4073         kvm_s390_forward_psw(vcpu, ilen);
4074         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4075 }
4076
4077 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4078 {
4079         struct mcck_volatile_info *mcck_info;
4080         struct sie_page *sie_page;
4081
4082         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4083                    vcpu->arch.sie_block->icptcode);
4084         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4085
4086         if (guestdbg_enabled(vcpu))
4087                 kvm_s390_restore_guest_per_regs(vcpu);
4088
4089         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4090         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4091
4092         if (exit_reason == -EINTR) {
4093                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4094                 sie_page = container_of(vcpu->arch.sie_block,
4095                                         struct sie_page, sie_block);
4096                 mcck_info = &sie_page->mcck_info;
4097                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4098                 return 0;
4099         }
4100
4101         if (vcpu->arch.sie_block->icptcode > 0) {
4102                 int rc = kvm_handle_sie_intercept(vcpu);
4103
4104                 if (rc != -EOPNOTSUPP)
4105                         return rc;
4106                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4107                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4108                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4109                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4110                 return -EREMOTE;
4111         } else if (exit_reason != -EFAULT) {
4112                 vcpu->stat.exit_null++;
4113                 return 0;
4114         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4115                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4116                 vcpu->run->s390_ucontrol.trans_exc_code =
4117                                                 current->thread.gmap_addr;
4118                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4119                 return -EREMOTE;
4120         } else if (current->thread.gmap_pfault) {
4121                 trace_kvm_s390_major_guest_pfault(vcpu);
4122                 current->thread.gmap_pfault = 0;
4123                 if (kvm_arch_setup_async_pf(vcpu))
4124                         return 0;
4125                 vcpu->stat.pfault_sync++;
4126                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4127         }
4128         return vcpu_post_run_fault_in_sie(vcpu);
4129 }
4130
4131 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4132 static int __vcpu_run(struct kvm_vcpu *vcpu)
4133 {
4134         int rc, exit_reason;
4135         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4136
4137         /*
4138          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4139          * ning the guest), so that memslots (and other stuff) are protected
4140          */
4141         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4142
4143         do {
4144                 rc = vcpu_pre_run(vcpu);
4145                 if (rc)
4146                         break;
4147
4148                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4149                 /*
4150                  * As PF_VCPU will be used in fault handler, between
4151                  * guest_enter and guest_exit should be no uaccess.
4152                  */
4153                 local_irq_disable();
4154                 guest_enter_irqoff();
4155                 __disable_cpu_timer_accounting(vcpu);
4156                 local_irq_enable();
4157                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4158                         memcpy(sie_page->pv_grregs,
4159                                vcpu->run->s.regs.gprs,
4160                                sizeof(sie_page->pv_grregs));
4161                 }
4162                 if (test_cpu_flag(CIF_FPU))
4163                         load_fpu_regs();
4164                 exit_reason = sie64a(vcpu->arch.sie_block,
4165                                      vcpu->run->s.regs.gprs);
4166                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4167                         memcpy(vcpu->run->s.regs.gprs,
4168                                sie_page->pv_grregs,
4169                                sizeof(sie_page->pv_grregs));
4170                         /*
4171                          * We're not allowed to inject interrupts on intercepts
4172                          * that leave the guest state in an "in-between" state
4173                          * where the next SIE entry will do a continuation.
4174                          * Fence interrupts in our "internal" PSW.
4175                          */
4176                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4177                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4178                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4179                         }
4180                 }
4181                 local_irq_disable();
4182                 __enable_cpu_timer_accounting(vcpu);
4183                 guest_exit_irqoff();
4184                 local_irq_enable();
4185                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4186
4187                 rc = vcpu_post_run(vcpu, exit_reason);
4188         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4189
4190         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4191         return rc;
4192 }
4193
4194 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4195 {
4196         struct kvm_run *kvm_run = vcpu->run;
4197         struct runtime_instr_cb *riccb;
4198         struct gs_cb *gscb;
4199
4200         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4201         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4202         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4203         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4204         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4205                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4206                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4207                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4208         }
4209         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4210                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4211                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4212                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4213                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4214                         kvm_clear_async_pf_completion_queue(vcpu);
4215         }
4216         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4217                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4218                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4219                 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4220         }
4221         /*
4222          * If userspace sets the riccb (e.g. after migration) to a valid state,
4223          * we should enable RI here instead of doing the lazy enablement.
4224          */
4225         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4226             test_kvm_facility(vcpu->kvm, 64) &&
4227             riccb->v &&
4228             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4229                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4230                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4231         }
4232         /*
4233          * If userspace sets the gscb (e.g. after migration) to non-zero,
4234          * we should enable GS here instead of doing the lazy enablement.
4235          */
4236         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4237             test_kvm_facility(vcpu->kvm, 133) &&
4238             gscb->gssm &&
4239             !vcpu->arch.gs_enabled) {
4240                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4241                 vcpu->arch.sie_block->ecb |= ECB_GS;
4242                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4243                 vcpu->arch.gs_enabled = 1;
4244         }
4245         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4246             test_kvm_facility(vcpu->kvm, 82)) {
4247                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4248                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4249         }
4250         if (MACHINE_HAS_GS) {
4251                 preempt_disable();
4252                 __ctl_set_bit(2, 4);
4253                 if (current->thread.gs_cb) {
4254                         vcpu->arch.host_gscb = current->thread.gs_cb;
4255                         save_gs_cb(vcpu->arch.host_gscb);
4256                 }
4257                 if (vcpu->arch.gs_enabled) {
4258                         current->thread.gs_cb = (struct gs_cb *)
4259                                                 &vcpu->run->s.regs.gscb;
4260                         restore_gs_cb(current->thread.gs_cb);
4261                 }
4262                 preempt_enable();
4263         }
4264         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4265 }
4266
4267 static void sync_regs(struct kvm_vcpu *vcpu)
4268 {
4269         struct kvm_run *kvm_run = vcpu->run;
4270
4271         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4272                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4273         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4274                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4275                 /* some control register changes require a tlb flush */
4276                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4277         }
4278         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4279                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4280                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4281         }
4282         save_access_regs(vcpu->arch.host_acrs);
4283         restore_access_regs(vcpu->run->s.regs.acrs);
4284         /* save host (userspace) fprs/vrs */
4285         save_fpu_regs();
4286         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4287         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4288         if (MACHINE_HAS_VX)
4289                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4290         else
4291                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4292         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4293         if (test_fp_ctl(current->thread.fpu.fpc))
4294                 /* User space provided an invalid FPC, let's clear it */
4295                 current->thread.fpu.fpc = 0;
4296
4297         /* Sync fmt2 only data */
4298         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4299                 sync_regs_fmt2(vcpu);
4300         } else {
4301                 /*
4302                  * In several places we have to modify our internal view to
4303                  * not do things that are disallowed by the ultravisor. For
4304                  * example we must not inject interrupts after specific exits
4305                  * (e.g. 112 prefix page not secure). We do this by turning
4306                  * off the machine check, external and I/O interrupt bits
4307                  * of our PSW copy. To avoid getting validity intercepts, we
4308                  * do only accept the condition code from userspace.
4309                  */
4310                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4311                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4312                                                    PSW_MASK_CC;
4313         }
4314
4315         kvm_run->kvm_dirty_regs = 0;
4316 }
4317
4318 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4319 {
4320         struct kvm_run *kvm_run = vcpu->run;
4321
4322         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4323         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4324         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4325         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4326         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4327         if (MACHINE_HAS_GS) {
4328                 preempt_disable();
4329                 __ctl_set_bit(2, 4);
4330                 if (vcpu->arch.gs_enabled)
4331                         save_gs_cb(current->thread.gs_cb);
4332                 current->thread.gs_cb = vcpu->arch.host_gscb;
4333                 restore_gs_cb(vcpu->arch.host_gscb);
4334                 if (!vcpu->arch.host_gscb)
4335                         __ctl_clear_bit(2, 4);
4336                 vcpu->arch.host_gscb = NULL;
4337                 preempt_enable();
4338         }
4339         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4340 }
4341
4342 static void store_regs(struct kvm_vcpu *vcpu)
4343 {
4344         struct kvm_run *kvm_run = vcpu->run;
4345
4346         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4347         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4348         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4349         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4350         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4351         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4352         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4353         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4354         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4355         save_access_regs(vcpu->run->s.regs.acrs);
4356         restore_access_regs(vcpu->arch.host_acrs);
4357         /* Save guest register state */
4358         save_fpu_regs();
4359         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4360         /* Restore will be done lazily at return */
4361         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4362         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4363         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4364                 store_regs_fmt2(vcpu);
4365 }
4366
4367 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4368 {
4369         struct kvm_run *kvm_run = vcpu->run;
4370         int rc;
4371
4372         if (kvm_run->immediate_exit)
4373                 return -EINTR;
4374
4375         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4376             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4377                 return -EINVAL;
4378
4379         vcpu_load(vcpu);
4380
4381         if (guestdbg_exit_pending(vcpu)) {
4382                 kvm_s390_prepare_debug_exit(vcpu);
4383                 rc = 0;
4384                 goto out;
4385         }
4386
4387         kvm_sigset_activate(vcpu);
4388
4389         /*
4390          * no need to check the return value of vcpu_start as it can only have
4391          * an error for protvirt, but protvirt means user cpu state
4392          */
4393         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4394                 kvm_s390_vcpu_start(vcpu);
4395         } else if (is_vcpu_stopped(vcpu)) {
4396                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4397                                    vcpu->vcpu_id);
4398                 rc = -EINVAL;
4399                 goto out;
4400         }
4401
4402         sync_regs(vcpu);
4403         enable_cpu_timer_accounting(vcpu);
4404
4405         might_fault();
4406         rc = __vcpu_run(vcpu);
4407
4408         if (signal_pending(current) && !rc) {
4409                 kvm_run->exit_reason = KVM_EXIT_INTR;
4410                 rc = -EINTR;
4411         }
4412
4413         if (guestdbg_exit_pending(vcpu) && !rc)  {
4414                 kvm_s390_prepare_debug_exit(vcpu);
4415                 rc = 0;
4416         }
4417
4418         if (rc == -EREMOTE) {
4419                 /* userspace support is needed, kvm_run has been prepared */
4420                 rc = 0;
4421         }
4422
4423         disable_cpu_timer_accounting(vcpu);
4424         store_regs(vcpu);
4425
4426         kvm_sigset_deactivate(vcpu);
4427
4428         vcpu->stat.exit_userspace++;
4429 out:
4430         vcpu_put(vcpu);
4431         return rc;
4432 }
4433
4434 /*
4435  * store status at address
4436  * we use have two special cases:
4437  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4438  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4439  */
4440 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4441 {
4442         unsigned char archmode = 1;
4443         freg_t fprs[NUM_FPRS];
4444         unsigned int px;
4445         u64 clkcomp, cputm;
4446         int rc;
4447
4448         px = kvm_s390_get_prefix(vcpu);
4449         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4450                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4451                         return -EFAULT;
4452                 gpa = 0;
4453         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4454                 if (write_guest_real(vcpu, 163, &archmode, 1))
4455                         return -EFAULT;
4456                 gpa = px;
4457         } else
4458                 gpa -= __LC_FPREGS_SAVE_AREA;
4459
4460         /* manually convert vector registers if necessary */
4461         if (MACHINE_HAS_VX) {
4462                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4463                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4464                                      fprs, 128);
4465         } else {
4466                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4467                                      vcpu->run->s.regs.fprs, 128);
4468         }
4469         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4470                               vcpu->run->s.regs.gprs, 128);
4471         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4472                               &vcpu->arch.sie_block->gpsw, 16);
4473         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4474                               &px, 4);
4475         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4476                               &vcpu->run->s.regs.fpc, 4);
4477         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4478                               &vcpu->arch.sie_block->todpr, 4);
4479         cputm = kvm_s390_get_cpu_timer(vcpu);
4480         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4481                               &cputm, 8);
4482         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4483         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4484                               &clkcomp, 8);
4485         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4486                               &vcpu->run->s.regs.acrs, 64);
4487         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4488                               &vcpu->arch.sie_block->gcr, 128);
4489         return rc ? -EFAULT : 0;
4490 }
4491
4492 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4493 {
4494         /*
4495          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4496          * switch in the run ioctl. Let's update our copies before we save
4497          * it into the save area
4498          */
4499         save_fpu_regs();
4500         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4501         save_access_regs(vcpu->run->s.regs.acrs);
4502
4503         return kvm_s390_store_status_unloaded(vcpu, addr);
4504 }
4505
4506 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4507 {
4508         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4509         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4510 }
4511
4512 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4513 {
4514         unsigned long i;
4515         struct kvm_vcpu *vcpu;
4516
4517         kvm_for_each_vcpu(i, vcpu, kvm) {
4518                 __disable_ibs_on_vcpu(vcpu);
4519         }
4520 }
4521
4522 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4523 {
4524         if (!sclp.has_ibs)
4525                 return;
4526         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4527         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4528 }
4529
4530 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4531 {
4532         int i, online_vcpus, r = 0, started_vcpus = 0;
4533
4534         if (!is_vcpu_stopped(vcpu))
4535                 return 0;
4536
4537         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4538         /* Only one cpu at a time may enter/leave the STOPPED state. */
4539         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4540         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4541
4542         /* Let's tell the UV that we want to change into the operating state */
4543         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4544                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4545                 if (r) {
4546                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4547                         return r;
4548                 }
4549         }
4550
4551         for (i = 0; i < online_vcpus; i++) {
4552                 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4553                         started_vcpus++;
4554         }
4555
4556         if (started_vcpus == 0) {
4557                 /* we're the only active VCPU -> speed it up */
4558                 __enable_ibs_on_vcpu(vcpu);
4559         } else if (started_vcpus == 1) {
4560                 /*
4561                  * As we are starting a second VCPU, we have to disable
4562                  * the IBS facility on all VCPUs to remove potentially
4563                  * outstanding ENABLE requests.
4564                  */
4565                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4566         }
4567
4568         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4569         /*
4570          * The real PSW might have changed due to a RESTART interpreted by the
4571          * ultravisor. We block all interrupts and let the next sie exit
4572          * refresh our view.
4573          */
4574         if (kvm_s390_pv_cpu_is_protected(vcpu))
4575                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4576         /*
4577          * Another VCPU might have used IBS while we were offline.
4578          * Let's play safe and flush the VCPU at startup.
4579          */
4580         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4581         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4582         return 0;
4583 }
4584
4585 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4586 {
4587         int i, online_vcpus, r = 0, started_vcpus = 0;
4588         struct kvm_vcpu *started_vcpu = NULL;
4589
4590         if (is_vcpu_stopped(vcpu))
4591                 return 0;
4592
4593         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4594         /* Only one cpu at a time may enter/leave the STOPPED state. */
4595         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4596         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4597
4598         /* Let's tell the UV that we want to change into the stopped state */
4599         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4600                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4601                 if (r) {
4602                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4603                         return r;
4604                 }
4605         }
4606
4607         /*
4608          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4609          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4610          * have been fully processed. This will ensure that the VCPU
4611          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4612          */
4613         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4614         kvm_s390_clear_stop_irq(vcpu);
4615
4616         __disable_ibs_on_vcpu(vcpu);
4617
4618         for (i = 0; i < online_vcpus; i++) {
4619                 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4620
4621                 if (!is_vcpu_stopped(tmp)) {
4622                         started_vcpus++;
4623                         started_vcpu = tmp;
4624                 }
4625         }
4626
4627         if (started_vcpus == 1) {
4628                 /*
4629                  * As we only have one VCPU left, we want to enable the
4630                  * IBS facility for that VCPU to speed it up.
4631                  */
4632                 __enable_ibs_on_vcpu(started_vcpu);
4633         }
4634
4635         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4636         return 0;
4637 }
4638
4639 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4640                                      struct kvm_enable_cap *cap)
4641 {
4642         int r;
4643
4644         if (cap->flags)
4645                 return -EINVAL;
4646
4647         switch (cap->cap) {
4648         case KVM_CAP_S390_CSS_SUPPORT:
4649                 if (!vcpu->kvm->arch.css_support) {
4650                         vcpu->kvm->arch.css_support = 1;
4651                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4652                         trace_kvm_s390_enable_css(vcpu->kvm);
4653                 }
4654                 r = 0;
4655                 break;
4656         default:
4657                 r = -EINVAL;
4658                 break;
4659         }
4660         return r;
4661 }
4662
4663 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4664                                    struct kvm_s390_mem_op *mop)
4665 {
4666         void __user *uaddr = (void __user *)mop->buf;
4667         int r = 0;
4668
4669         if (mop->flags || !mop->size)
4670                 return -EINVAL;
4671         if (mop->size + mop->sida_offset < mop->size)
4672                 return -EINVAL;
4673         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4674                 return -E2BIG;
4675         if (!kvm_s390_pv_cpu_is_protected(vcpu))
4676                 return -EINVAL;
4677
4678         switch (mop->op) {
4679         case KVM_S390_MEMOP_SIDA_READ:
4680                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4681                                  mop->sida_offset), mop->size))
4682                         r = -EFAULT;
4683
4684                 break;
4685         case KVM_S390_MEMOP_SIDA_WRITE:
4686                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4687                                    mop->sida_offset), uaddr, mop->size))
4688                         r = -EFAULT;
4689                 break;
4690         }
4691         return r;
4692 }
4693 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4694                                   struct kvm_s390_mem_op *mop)
4695 {
4696         void __user *uaddr = (void __user *)mop->buf;
4697         void *tmpbuf = NULL;
4698         int r = 0;
4699         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4700                                     | KVM_S390_MEMOP_F_CHECK_ONLY
4701                                     | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4702
4703         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4704                 return -EINVAL;
4705         if (mop->size > MEM_OP_MAX_SIZE)
4706                 return -E2BIG;
4707         if (kvm_s390_pv_cpu_is_protected(vcpu))
4708                 return -EINVAL;
4709         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4710                 if (access_key_invalid(mop->key))
4711                         return -EINVAL;
4712         } else {
4713                 mop->key = 0;
4714         }
4715         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4716                 tmpbuf = vmalloc(mop->size);
4717                 if (!tmpbuf)
4718                         return -ENOMEM;
4719         }
4720
4721         switch (mop->op) {
4722         case KVM_S390_MEMOP_LOGICAL_READ:
4723                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4724                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4725                                             GACC_FETCH, mop->key);
4726                         break;
4727                 }
4728                 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4729                                         mop->size, mop->key);
4730                 if (r == 0) {
4731                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4732                                 r = -EFAULT;
4733                 }
4734                 break;
4735         case KVM_S390_MEMOP_LOGICAL_WRITE:
4736                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4737                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4738                                             GACC_STORE, mop->key);
4739                         break;
4740                 }
4741                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4742                         r = -EFAULT;
4743                         break;
4744                 }
4745                 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4746                                          mop->size, mop->key);
4747                 break;
4748         }
4749
4750         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4751                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4752
4753         vfree(tmpbuf);
4754         return r;
4755 }
4756
4757 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4758                                       struct kvm_s390_mem_op *mop)
4759 {
4760         int r, srcu_idx;
4761
4762         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4763
4764         switch (mop->op) {
4765         case KVM_S390_MEMOP_LOGICAL_READ:
4766         case KVM_S390_MEMOP_LOGICAL_WRITE:
4767                 r = kvm_s390_guest_mem_op(vcpu, mop);
4768                 break;
4769         case KVM_S390_MEMOP_SIDA_READ:
4770         case KVM_S390_MEMOP_SIDA_WRITE:
4771                 /* we are locked against sida going away by the vcpu->mutex */
4772                 r = kvm_s390_guest_sida_op(vcpu, mop);
4773                 break;
4774         default:
4775                 r = -EINVAL;
4776         }
4777
4778         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4779         return r;
4780 }
4781
4782 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4783                                unsigned int ioctl, unsigned long arg)
4784 {
4785         struct kvm_vcpu *vcpu = filp->private_data;
4786         void __user *argp = (void __user *)arg;
4787
4788         switch (ioctl) {
4789         case KVM_S390_IRQ: {
4790                 struct kvm_s390_irq s390irq;
4791
4792                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4793                         return -EFAULT;
4794                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4795         }
4796         case KVM_S390_INTERRUPT: {
4797                 struct kvm_s390_interrupt s390int;
4798                 struct kvm_s390_irq s390irq = {};
4799
4800                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4801                         return -EFAULT;
4802                 if (s390int_to_s390irq(&s390int, &s390irq))
4803                         return -EINVAL;
4804                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4805         }
4806         }
4807         return -ENOIOCTLCMD;
4808 }
4809
4810 long kvm_arch_vcpu_ioctl(struct file *filp,
4811                          unsigned int ioctl, unsigned long arg)
4812 {
4813         struct kvm_vcpu *vcpu = filp->private_data;
4814         void __user *argp = (void __user *)arg;
4815         int idx;
4816         long r;
4817         u16 rc, rrc;
4818
4819         vcpu_load(vcpu);
4820
4821         switch (ioctl) {
4822         case KVM_S390_STORE_STATUS:
4823                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4824                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4825                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4826                 break;
4827         case KVM_S390_SET_INITIAL_PSW: {
4828                 psw_t psw;
4829
4830                 r = -EFAULT;
4831                 if (copy_from_user(&psw, argp, sizeof(psw)))
4832                         break;
4833                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4834                 break;
4835         }
4836         case KVM_S390_CLEAR_RESET:
4837                 r = 0;
4838                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4839                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4840                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4841                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4842                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4843                                    rc, rrc);
4844                 }
4845                 break;
4846         case KVM_S390_INITIAL_RESET:
4847                 r = 0;
4848                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4849                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4850                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4851                                           UVC_CMD_CPU_RESET_INITIAL,
4852                                           &rc, &rrc);
4853                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4854                                    rc, rrc);
4855                 }
4856                 break;
4857         case KVM_S390_NORMAL_RESET:
4858                 r = 0;
4859                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4860                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4861                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4862                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4863                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4864                                    rc, rrc);
4865                 }
4866                 break;
4867         case KVM_SET_ONE_REG:
4868         case KVM_GET_ONE_REG: {
4869                 struct kvm_one_reg reg;
4870                 r = -EINVAL;
4871                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4872                         break;
4873                 r = -EFAULT;
4874                 if (copy_from_user(&reg, argp, sizeof(reg)))
4875                         break;
4876                 if (ioctl == KVM_SET_ONE_REG)
4877                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4878                 else
4879                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4880                 break;
4881         }
4882 #ifdef CONFIG_KVM_S390_UCONTROL
4883         case KVM_S390_UCAS_MAP: {
4884                 struct kvm_s390_ucas_mapping ucasmap;
4885
4886                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4887                         r = -EFAULT;
4888                         break;
4889                 }
4890
4891                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4892                         r = -EINVAL;
4893                         break;
4894                 }
4895
4896                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4897                                      ucasmap.vcpu_addr, ucasmap.length);
4898                 break;
4899         }
4900         case KVM_S390_UCAS_UNMAP: {
4901                 struct kvm_s390_ucas_mapping ucasmap;
4902
4903                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4904                         r = -EFAULT;
4905                         break;
4906                 }
4907
4908                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4909                         r = -EINVAL;
4910                         break;
4911                 }
4912
4913                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4914                         ucasmap.length);
4915                 break;
4916         }
4917 #endif
4918         case KVM_S390_VCPU_FAULT: {
4919                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4920                 break;
4921         }
4922         case KVM_ENABLE_CAP:
4923         {
4924                 struct kvm_enable_cap cap;
4925                 r = -EFAULT;
4926                 if (copy_from_user(&cap, argp, sizeof(cap)))
4927                         break;
4928                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4929                 break;
4930         }
4931         case KVM_S390_MEM_OP: {
4932                 struct kvm_s390_mem_op mem_op;
4933
4934                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4935                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4936                 else
4937                         r = -EFAULT;
4938                 break;
4939         }
4940         case KVM_S390_SET_IRQ_STATE: {
4941                 struct kvm_s390_irq_state irq_state;
4942
4943                 r = -EFAULT;
4944                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4945                         break;
4946                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4947                     irq_state.len == 0 ||
4948                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4949                         r = -EINVAL;
4950                         break;
4951                 }
4952                 /* do not use irq_state.flags, it will break old QEMUs */
4953                 r = kvm_s390_set_irq_state(vcpu,
4954                                            (void __user *) irq_state.buf,
4955                                            irq_state.len);
4956                 break;
4957         }
4958         case KVM_S390_GET_IRQ_STATE: {
4959                 struct kvm_s390_irq_state irq_state;
4960
4961                 r = -EFAULT;
4962                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4963                         break;
4964                 if (irq_state.len == 0) {
4965                         r = -EINVAL;
4966                         break;
4967                 }
4968                 /* do not use irq_state.flags, it will break old QEMUs */
4969                 r = kvm_s390_get_irq_state(vcpu,
4970                                            (__u8 __user *)  irq_state.buf,
4971                                            irq_state.len);
4972                 break;
4973         }
4974         default:
4975                 r = -ENOTTY;
4976         }
4977
4978         vcpu_put(vcpu);
4979         return r;
4980 }
4981
4982 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4983 {
4984 #ifdef CONFIG_KVM_S390_UCONTROL
4985         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4986                  && (kvm_is_ucontrol(vcpu->kvm))) {
4987                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4988                 get_page(vmf->page);
4989                 return 0;
4990         }
4991 #endif
4992         return VM_FAULT_SIGBUS;
4993 }
4994
4995 /* Section: memory related */
4996 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4997                                    const struct kvm_memory_slot *old,
4998                                    struct kvm_memory_slot *new,
4999                                    enum kvm_mr_change change)
5000 {
5001         gpa_t size;
5002
5003         /* When we are protected, we should not change the memory slots */
5004         if (kvm_s390_pv_get_handle(kvm))
5005                 return -EINVAL;
5006
5007         if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5008                 return 0;
5009
5010         /* A few sanity checks. We can have memory slots which have to be
5011            located/ended at a segment boundary (1MB). The memory in userland is
5012            ok to be fragmented into various different vmas. It is okay to mmap()
5013            and munmap() stuff in this slot after doing this call at any time */
5014
5015         if (new->userspace_addr & 0xffffful)
5016                 return -EINVAL;
5017
5018         size = new->npages * PAGE_SIZE;
5019         if (size & 0xffffful)
5020                 return -EINVAL;
5021
5022         if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5023                 return -EINVAL;
5024
5025         return 0;
5026 }
5027
5028 void kvm_arch_commit_memory_region(struct kvm *kvm,
5029                                 struct kvm_memory_slot *old,
5030                                 const struct kvm_memory_slot *new,
5031                                 enum kvm_mr_change change)
5032 {
5033         int rc = 0;
5034
5035         switch (change) {
5036         case KVM_MR_DELETE:
5037                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5038                                         old->npages * PAGE_SIZE);
5039                 break;
5040         case KVM_MR_MOVE:
5041                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5042                                         old->npages * PAGE_SIZE);
5043                 if (rc)
5044                         break;
5045                 fallthrough;
5046         case KVM_MR_CREATE:
5047                 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5048                                       new->base_gfn * PAGE_SIZE,
5049                                       new->npages * PAGE_SIZE);
5050                 break;
5051         case KVM_MR_FLAGS_ONLY:
5052                 break;
5053         default:
5054                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5055         }
5056         if (rc)
5057                 pr_warn("failed to commit memory region\n");
5058         return;
5059 }
5060
5061 static inline unsigned long nonhyp_mask(int i)
5062 {
5063         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5064
5065         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5066 }
5067
5068 static int __init kvm_s390_init(void)
5069 {
5070         int i;
5071
5072         if (!sclp.has_sief2) {
5073                 pr_info("SIE is not available\n");
5074                 return -ENODEV;
5075         }
5076
5077         if (nested && hpage) {
5078                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5079                 return -EINVAL;
5080         }
5081
5082         for (i = 0; i < 16; i++)
5083                 kvm_s390_fac_base[i] |=
5084                         stfle_fac_list[i] & nonhyp_mask(i);
5085
5086         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5087 }
5088
5089 static void __exit kvm_s390_exit(void)
5090 {
5091         kvm_exit();
5092 }
5093
5094 module_init(kvm_s390_init);
5095 module_exit(kvm_s390_exit);
5096
5097 /*
5098  * Enable autoloading of the kvm module.
5099  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5100  * since x86 takes a different approach.
5101  */
5102 #include <linux/miscdevice.h>
5103 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5104 MODULE_ALIAS("devname:kvm");