arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62         KVM_GENERIC_VM_STATS(),
  63         STATS_DESC_COUNTER(VM, inject_io),
  64         STATS_DESC_COUNTER(VM, inject_float_mchk),
  65         STATS_DESC_COUNTER(VM, inject_pfault_done),
  66         STATS_DESC_COUNTER(VM, inject_service_signal),
  67         STATS_DESC_COUNTER(VM, inject_virtio)
  68 };
  69
  70 const struct kvm_stats_header kvm_vm_stats_header = {
  71         .name_size = KVM_STATS_NAME_SIZE,
  72         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  73         .id_offset = sizeof(struct kvm_stats_header),
  74         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  75         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  76                        sizeof(kvm_vm_stats_desc),
  77 };
  78
  79 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  80         KVM_GENERIC_VCPU_STATS(),
  81         STATS_DESC_COUNTER(VCPU, exit_userspace),
  82         STATS_DESC_COUNTER(VCPU, exit_null),
  83         STATS_DESC_COUNTER(VCPU, exit_external_request),
  84         STATS_DESC_COUNTER(VCPU, exit_io_request),
  85         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  86         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  87         STATS_DESC_COUNTER(VCPU, exit_validity),
  88         STATS_DESC_COUNTER(VCPU, exit_instruction),
  89         STATS_DESC_COUNTER(VCPU, exit_pei),
  90         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  91         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  92         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  93         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  94         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  95         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  96         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  97         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  98         STATS_DESC_COUNTER(VCPU, deliver_ckc),
  99         STATS_DESC_COUNTER(VCPU, deliver_cputm),
 100         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 101         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 102         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 103         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 104         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 106         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 107         STATS_DESC_COUNTER(VCPU, deliver_program),
 108         STATS_DESC_COUNTER(VCPU, deliver_io),
 109         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 110         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 111         STATS_DESC_COUNTER(VCPU, inject_ckc),
 112         STATS_DESC_COUNTER(VCPU, inject_cputm),
 113         STATS_DESC_COUNTER(VCPU, inject_external_call),
 114         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 115         STATS_DESC_COUNTER(VCPU, inject_mchk),
 116         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 117         STATS_DESC_COUNTER(VCPU, inject_program),
 118         STATS_DESC_COUNTER(VCPU, inject_restart),
 119         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 120         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 121         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 122         STATS_DESC_COUNTER(VCPU, instruction_gs),
 123         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 124         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 125         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 126         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 127         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 128         STATS_DESC_COUNTER(VCPU, instruction_sck),
 129         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 130         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 131         STATS_DESC_COUNTER(VCPU, instruction_spx),
 132         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 133         STATS_DESC_COUNTER(VCPU, instruction_stap),
 134         STATS_DESC_COUNTER(VCPU, instruction_iske),
 135         STATS_DESC_COUNTER(VCPU, instruction_ri),
 136         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 137         STATS_DESC_COUNTER(VCPU, instruction_sske),
 138         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 139         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 140         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 141         STATS_DESC_COUNTER(VCPU, instruction_tb),
 142         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 143         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 144         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 145         STATS_DESC_COUNTER(VCPU, instruction_sie),
 146         STATS_DESC_COUNTER(VCPU, instruction_essa),
 147         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 149         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 163         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 167         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 168         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 173         STATS_DESC_COUNTER(VCPU, pfault_sync)
 174 };
 175
 176 const struct kvm_stats_header kvm_vcpu_stats_header = {
 177         .name_size = KVM_STATS_NAME_SIZE,
 178         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 179         .id_offset = sizeof(struct kvm_stats_header),
 180         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 181         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 182                        sizeof(kvm_vcpu_stats_desc),
 183 };
 184
 185 /* allow nested virtualization in KVM (if enabled by user space) */
 186 static int nested;
 187 module_param(nested, int, S_IRUGO);
 188 MODULE_PARM_DESC(nested, "Nested virtualization support");
 189
 190 /* allow 1m huge page guest backing, if !nested */
 191 static int hpage;
 192 module_param(hpage, int, 0444);
 193 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 194
 195 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 196 static u8 halt_poll_max_steal = 10;
 197 module_param(halt_poll_max_steal, byte, 0644);
 198 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 199
 200 /* if set to true, the GISA will be initialized and used if available */
 201 static bool use_gisa  = true;
 202 module_param(use_gisa, bool, 0644);
 203 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 204
 205 /* maximum diag9c forwarding per second */
 206 unsigned int diag9c_forwarding_hz;
 207 module_param(diag9c_forwarding_hz, uint, 0644);
 208 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 209
 210 /*
 211  * For now we handle at most 16 double words as this is what the s390 base
 212  * kernel handles and stores in the prefix page. If we ever need to go beyond
 213  * this, this requires changes to code, but the external uapi can stay.
 214  */
 215 #define SIZE_INTERNAL 16
 216
 217 /*
 218  * Base feature mask that defines default mask for facilities. Consists of the
 219  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 220  */
 221 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 222 /*
 223  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 224  * and defines the facilities that can be enabled via a cpu model.
 225  */
 226 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 227
 228 static unsigned long kvm_s390_fac_size(void)
 229 {
 230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 231         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 232         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 233                 sizeof(stfle_fac_list));
 234
 235         return SIZE_INTERNAL;
 236 }
 237
 238 /* available cpu features supported by kvm */
 239 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 240 /* available subfunctions indicated via query / "test bit" */
 241 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 242
 243 static struct gmap_notifier gmap_notifier;
 244 static struct gmap_notifier vsie_gmap_notifier;
 245 debug_info_t *kvm_s390_dbf;
 246 debug_info_t *kvm_s390_dbf_uv;
 247
 248 /* Section: not file related */
 249 int kvm_arch_hardware_enable(void)
 250 {
 251         /* every s390 is virtualization enabled ;-) */
 252         return 0;
 253 }
 254
 255 int kvm_arch_check_processor_compat(void *opaque)
 256 {
 257         return 0;
 258 }
 259
 260 /* forward declarations */
 261 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 262                               unsigned long end);
 263 static int sca_switch_to_extended(struct kvm *kvm);
 264
 265 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 266 {
 267         u8 delta_idx = 0;
 268
 269         /*
 270          * The TOD jumps by delta, we have to compensate this by adding
 271          * -delta to the epoch.
 272          */
 273         delta = -delta;
 274
 275         /* sign-extension - we're adding to signed values below */
 276         if ((s64)delta < 0)
 277                 delta_idx = -1;
 278
 279         scb->epoch += delta;
 280         if (scb->ecd & ECD_MEF) {
 281                 scb->epdx += delta_idx;
 282                 if (scb->epoch < delta)
 283                         scb->epdx += 1;
 284         }
 285 }
 286
 287 /*
 288  * This callback is executed during stop_machine(). All CPUs are therefore
 289  * temporarily stopped. In order not to change guest behavior, we have to
 290  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 291  * so a CPU won't be stopped while calculating with the epoch.
 292  */
 293 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 294                           void *v)
 295 {
 296         struct kvm *kvm;
 297         struct kvm_vcpu *vcpu;
 298         unsigned long i;
 299         unsigned long long *delta = v;
 300
 301         list_for_each_entry(kvm, &vm_list, vm_list) {
 302                 kvm_for_each_vcpu(i, vcpu, kvm) {
 303                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 304                         if (i == 0) {
 305                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 306                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 307                         }
 308                         if (vcpu->arch.cputm_enabled)
 309                                 vcpu->arch.cputm_start += *delta;
 310                         if (vcpu->arch.vsie_block)
 311                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 312                                                    *delta);
 313                 }
 314         }
 315         return NOTIFY_OK;
 316 }
 317
 318 static struct notifier_block kvm_clock_notifier = {
 319         .notifier_call = kvm_clock_sync,
 320 };
 321
 322 int kvm_arch_hardware_setup(void *opaque)
 323 {
 324         gmap_notifier.notifier_call = kvm_gmap_notifier;
 325         gmap_register_pte_notifier(&gmap_notifier);
 326         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 327         gmap_register_pte_notifier(&vsie_gmap_notifier);
 328         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 329                                        &kvm_clock_notifier);
 330         return 0;
 331 }
 332
 333 void kvm_arch_hardware_unsetup(void)
 334 {
 335         gmap_unregister_pte_notifier(&gmap_notifier);
 336         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 337         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 338                                          &kvm_clock_notifier);
 339 }
 340
 341 static void allow_cpu_feat(unsigned long nr)
 342 {
 343         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 344 }
 345
 346 static inline int plo_test_bit(unsigned char nr)
 347 {
 348         unsigned long function = (unsigned long)nr | 0x100;
 349         int cc;
 350
 351         asm volatile(
 352                 "       lgr     0,%[function]\n"
 353                 /* Parameter registers are ignored for "test bit" */
 354                 "       plo     0,0,0,0(0)\n"
 355                 "       ipm     %0\n"
 356                 "       srl     %0,28\n"
 357                 : "=d" (cc)
 358                 : [function] "d" (function)
 359                 : "cc", "0");
 360         return cc == 0;
 361 }
 362
 363 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 364 {
 365         asm volatile(
 366                 "       lghi    0,0\n"
 367                 "       lgr     1,%[query]\n"
 368                 /* Parameter registers are ignored */
 369                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 370                 :
 371                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 372                 : "cc", "memory", "0", "1");
 373 }
 374
 375 #define INSN_SORTL 0xb938
 376 #define INSN_DFLTCC 0xb939
 377
 378 static void kvm_s390_cpu_feat_init(void)
 379 {
 380         int i;
 381
 382         for (i = 0; i < 256; ++i) {
 383                 if (plo_test_bit(i))
 384                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 385         }
 386
 387         if (test_facility(28)) /* TOD-clock steering */
 388                 ptff(kvm_s390_available_subfunc.ptff,
 389                      sizeof(kvm_s390_available_subfunc.ptff),
 390                      PTFF_QAF);
 391
 392         if (test_facility(17)) { /* MSA */
 393                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 394                               kvm_s390_available_subfunc.kmac);
 395                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 396                               kvm_s390_available_subfunc.kmc);
 397                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.km);
 399                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kimd);
 401                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.klmd);
 403         }
 404         if (test_facility(76)) /* MSA3 */
 405                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.pckmo);
 407         if (test_facility(77)) { /* MSA4 */
 408                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 409                               kvm_s390_available_subfunc.kmctr);
 410                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 411                               kvm_s390_available_subfunc.kmf);
 412                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 413                               kvm_s390_available_subfunc.kmo);
 414                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 415                               kvm_s390_available_subfunc.pcc);
 416         }
 417         if (test_facility(57)) /* MSA5 */
 418                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 419                               kvm_s390_available_subfunc.ppno);
 420
 421         if (test_facility(146)) /* MSA8 */
 422                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 423                               kvm_s390_available_subfunc.kma);
 424
 425         if (test_facility(155)) /* MSA9 */
 426                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 427                               kvm_s390_available_subfunc.kdsa);
 428
 429         if (test_facility(150)) /* SORTL */
 430                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 431
 432         if (test_facility(151)) /* DFLTCC */
 433                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 434
 435         if (MACHINE_HAS_ESOP)
 436                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 437         /*
 438          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 439          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 440          */
 441         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 442             !test_facility(3) || !nested)
 443                 return;
 444         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 445         if (sclp.has_64bscao)
 446                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 447         if (sclp.has_siif)
 448                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 449         if (sclp.has_gpere)
 450                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 451         if (sclp.has_gsls)
 452                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 453         if (sclp.has_ib)
 454                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 455         if (sclp.has_cei)
 456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 457         if (sclp.has_ibs)
 458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 459         if (sclp.has_kss)
 460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 461         /*
 462          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 463          * all skey handling functions read/set the skey from the PGSTE
 464          * instead of the real storage key.
 465          *
 466          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 467          * pages being detected as preserved although they are resident.
 468          *
 469          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 470          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 471          *
 472          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 473          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 474          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 475          *
 476          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 477          * cannot easily shadow the SCA because of the ipte lock.
 478          */
 479 }
 480
 481 int kvm_arch_init(void *opaque)
 482 {
 483         int rc = -ENOMEM;
 484
 485         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 486         if (!kvm_s390_dbf)
 487                 return -ENOMEM;
 488
 489         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 490         if (!kvm_s390_dbf_uv)
 491                 goto out;
 492
 493         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 494             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 495                 goto out;
 496
 497         kvm_s390_cpu_feat_init();
 498
 499         /* Register floating interrupt controller interface. */
 500         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 501         if (rc) {
 502                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 503                 goto out;
 504         }
 505
 506         rc = kvm_s390_gib_init(GAL_ISC);
 507         if (rc)
 508                 goto out;
 509
 510         return 0;
 511
 512 out:
 513         kvm_arch_exit();
 514         return rc;
 515 }
 516
 517 void kvm_arch_exit(void)
 518 {
 519         kvm_s390_gib_destroy();
 520         debug_unregister(kvm_s390_dbf);
 521         debug_unregister(kvm_s390_dbf_uv);
 522 }
 523
 524 /* Section: device related */
 525 long kvm_arch_dev_ioctl(struct file *filp,
 526                         unsigned int ioctl, unsigned long arg)
 527 {
 528         if (ioctl == KVM_S390_ENABLE_SIE)
 529                 return s390_enable_sie();
 530         return -EINVAL;
 531 }
 532
 533 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 534 {
 535         int r;
 536
 537         switch (ext) {
 538         case KVM_CAP_S390_PSW:
 539         case KVM_CAP_S390_GMAP:
 540         case KVM_CAP_SYNC_MMU:
 541 #ifdef CONFIG_KVM_S390_UCONTROL
 542         case KVM_CAP_S390_UCONTROL:
 543 #endif
 544         case KVM_CAP_ASYNC_PF:
 545         case KVM_CAP_SYNC_REGS:
 546         case KVM_CAP_ONE_REG:
 547         case KVM_CAP_ENABLE_CAP:
 548         case KVM_CAP_S390_CSS_SUPPORT:
 549         case KVM_CAP_IOEVENTFD:
 550         case KVM_CAP_DEVICE_CTRL:
 551         case KVM_CAP_S390_IRQCHIP:
 552         case KVM_CAP_VM_ATTRIBUTES:
 553         case KVM_CAP_MP_STATE:
 554         case KVM_CAP_IMMEDIATE_EXIT:
 555         case KVM_CAP_S390_INJECT_IRQ:
 556         case KVM_CAP_S390_USER_SIGP:
 557         case KVM_CAP_S390_USER_STSI:
 558         case KVM_CAP_S390_SKEYS:
 559         case KVM_CAP_S390_IRQ_STATE:
 560         case KVM_CAP_S390_USER_INSTR0:
 561         case KVM_CAP_S390_CMMA_MIGRATION:
 562         case KVM_CAP_S390_AIS:
 563         case KVM_CAP_S390_AIS_MIGRATION:
 564         case KVM_CAP_S390_VCPU_RESETS:
 565         case KVM_CAP_SET_GUEST_DEBUG:
 566         case KVM_CAP_S390_DIAG318:
 567                 r = 1;
 568                 break;
 569         case KVM_CAP_SET_GUEST_DEBUG2:
 570                 r = KVM_GUESTDBG_VALID_MASK;
 571                 break;
 572         case KVM_CAP_S390_HPAGE_1M:
 573                 r = 0;
 574                 if (hpage && !kvm_is_ucontrol(kvm))
 575                         r = 1;
 576                 break;
 577         case KVM_CAP_S390_MEM_OP:
 578                 r = MEM_OP_MAX_SIZE;
 579                 break;
 580         case KVM_CAP_NR_VCPUS:
 581         case KVM_CAP_MAX_VCPUS:
 582         case KVM_CAP_MAX_VCPU_ID:
 583                 r = KVM_S390_BSCA_CPU_SLOTS;
 584                 if (!kvm_s390_use_sca_entries())
 585                         r = KVM_MAX_VCPUS;
 586                 else if (sclp.has_esca && sclp.has_64bscao)
 587                         r = KVM_S390_ESCA_CPU_SLOTS;
 588                 if (ext == KVM_CAP_NR_VCPUS)
 589                         r = min_t(unsigned int, num_online_cpus(), r);
 590                 break;
 591         case KVM_CAP_S390_COW:
 592                 r = MACHINE_HAS_ESOP;
 593                 break;
 594         case KVM_CAP_S390_VECTOR_REGISTERS:
 595                 r = MACHINE_HAS_VX;
 596                 break;
 597         case KVM_CAP_S390_RI:
 598                 r = test_facility(64);
 599                 break;
 600         case KVM_CAP_S390_GS:
 601                 r = test_facility(133);
 602                 break;
 603         case KVM_CAP_S390_BPB:
 604                 r = test_facility(82);
 605                 break;
 606         case KVM_CAP_S390_PROTECTED:
 607                 r = is_prot_virt_host();
 608                 break;
 609         default:
 610                 r = 0;
 611         }
 612         return r;
 613 }
 614
 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 616 {
 617         int i;
 618         gfn_t cur_gfn, last_gfn;
 619         unsigned long gaddr, vmaddr;
 620         struct gmap *gmap = kvm->arch.gmap;
 621         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 622
 623         /* Loop over all guest segments */
 624         cur_gfn = memslot->base_gfn;
 625         last_gfn = memslot->base_gfn + memslot->npages;
 626         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 627                 gaddr = gfn_to_gpa(cur_gfn);
 628                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 629                 if (kvm_is_error_hva(vmaddr))
 630                         continue;
 631
 632                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 633                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 634                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 635                         if (test_bit(i, bitmap))
 636                                 mark_page_dirty(kvm, cur_gfn + i);
 637                 }
 638
 639                 if (fatal_signal_pending(current))
 640                         return;
 641                 cond_resched();
 642         }
 643 }
 644
 645 /* Section: vm related */
 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 647
 648 /*
 649  * Get (and clear) the dirty memory log for a memory slot.
 650  */
 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 652                                struct kvm_dirty_log *log)
 653 {
 654         int r;
 655         unsigned long n;
 656         struct kvm_memory_slot *memslot;
 657         int is_dirty;
 658
 659         if (kvm_is_ucontrol(kvm))
 660                 return -EINVAL;
 661
 662         mutex_lock(&kvm->slots_lock);
 663
 664         r = -EINVAL;
 665         if (log->slot >= KVM_USER_MEM_SLOTS)
 666                 goto out;
 667
 668         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 669         if (r)
 670                 goto out;
 671
 672         /* Clear the dirty log */
 673         if (is_dirty) {
 674                 n = kvm_dirty_bitmap_bytes(memslot);
 675                 memset(memslot->dirty_bitmap, 0, n);
 676         }
 677         r = 0;
 678 out:
 679         mutex_unlock(&kvm->slots_lock);
 680         return r;
 681 }
 682
 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 684 {
 685         unsigned long i;
 686         struct kvm_vcpu *vcpu;
 687
 688         kvm_for_each_vcpu(i, vcpu, kvm) {
 689                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 690         }
 691 }
 692
 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 694 {
 695         int r;
 696
 697         if (cap->flags)
 698                 return -EINVAL;
 699
 700         switch (cap->cap) {
 701         case KVM_CAP_S390_IRQCHIP:
 702                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 703                 kvm->arch.use_irqchip = 1;
 704                 r = 0;
 705                 break;
 706         case KVM_CAP_S390_USER_SIGP:
 707                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 708                 kvm->arch.user_sigp = 1;
 709                 r = 0;
 710                 break;
 711         case KVM_CAP_S390_VECTOR_REGISTERS:
 712                 mutex_lock(&kvm->lock);
 713                 if (kvm->created_vcpus) {
 714                         r = -EBUSY;
 715                 } else if (MACHINE_HAS_VX) {
 716                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 717                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 718                         if (test_facility(134)) {
 719                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 720                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 721                         }
 722                         if (test_facility(135)) {
 723                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 724                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 725                         }
 726                         if (test_facility(148)) {
 727                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 728                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 729                         }
 730                         if (test_facility(152)) {
 731                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 732                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 733                         }
 734                         if (test_facility(192)) {
 735                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 736                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 737                         }
 738                         r = 0;
 739                 } else
 740                         r = -EINVAL;
 741                 mutex_unlock(&kvm->lock);
 742                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 743                          r ? "(not available)" : "(success)");
 744                 break;
 745         case KVM_CAP_S390_RI:
 746                 r = -EINVAL;
 747                 mutex_lock(&kvm->lock);
 748                 if (kvm->created_vcpus) {
 749                         r = -EBUSY;
 750                 } else if (test_facility(64)) {
 751                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 752                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 753                         r = 0;
 754                 }
 755                 mutex_unlock(&kvm->lock);
 756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 757                          r ? "(not available)" : "(success)");
 758                 break;
 759         case KVM_CAP_S390_AIS:
 760                 mutex_lock(&kvm->lock);
 761                 if (kvm->created_vcpus) {
 762                         r = -EBUSY;
 763                 } else {
 764                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 765                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 766                         r = 0;
 767                 }
 768                 mutex_unlock(&kvm->lock);
 769                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 770                          r ? "(not available)" : "(success)");
 771                 break;
 772         case KVM_CAP_S390_GS:
 773                 r = -EINVAL;
 774                 mutex_lock(&kvm->lock);
 775                 if (kvm->created_vcpus) {
 776                         r = -EBUSY;
 777                 } else if (test_facility(133)) {
 778                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 779                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 780                         r = 0;
 781                 }
 782                 mutex_unlock(&kvm->lock);
 783                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 784                          r ? "(not available)" : "(success)");
 785                 break;
 786         case KVM_CAP_S390_HPAGE_1M:
 787                 mutex_lock(&kvm->lock);
 788                 if (kvm->created_vcpus)
 789                         r = -EBUSY;
 790                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 791                         r = -EINVAL;
 792                 else {
 793                         r = 0;
 794                         mmap_write_lock(kvm->mm);
 795                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 796                         mmap_write_unlock(kvm->mm);
 797                         /*
 798                          * We might have to create fake 4k page
 799                          * tables. To avoid that the hardware works on
 800                          * stale PGSTEs, we emulate these instructions.
 801                          */
 802                         kvm->arch.use_skf = 0;
 803                         kvm->arch.use_pfmfi = 0;
 804                 }
 805                 mutex_unlock(&kvm->lock);
 806                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 807                          r ? "(not available)" : "(success)");
 808                 break;
 809         case KVM_CAP_S390_USER_STSI:
 810                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 811                 kvm->arch.user_stsi = 1;
 812                 r = 0;
 813                 break;
 814         case KVM_CAP_S390_USER_INSTR0:
 815                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 816                 kvm->arch.user_instr0 = 1;
 817                 icpt_operexc_on_all_vcpus(kvm);
 818                 r = 0;
 819                 break;
 820         default:
 821                 r = -EINVAL;
 822                 break;
 823         }
 824         return r;
 825 }
 826
 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 828 {
 829         int ret;
 830
 831         switch (attr->attr) {
 832         case KVM_S390_VM_MEM_LIMIT_SIZE:
 833                 ret = 0;
 834                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 835                          kvm->arch.mem_limit);
 836                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 837                         ret = -EFAULT;
 838                 break;
 839         default:
 840                 ret = -ENXIO;
 841                 break;
 842         }
 843         return ret;
 844 }
 845
 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 847 {
 848         int ret;
 849         unsigned int idx;
 850         switch (attr->attr) {
 851         case KVM_S390_VM_MEM_ENABLE_CMMA:
 852                 ret = -ENXIO;
 853                 if (!sclp.has_cmma)
 854                         break;
 855
 856                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 857                 mutex_lock(&kvm->lock);
 858                 if (kvm->created_vcpus)
 859                         ret = -EBUSY;
 860                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 861                         ret = -EINVAL;
 862                 else {
 863                         kvm->arch.use_cmma = 1;
 864                         /* Not compatible with cmma. */
 865                         kvm->arch.use_pfmfi = 0;
 866                         ret = 0;
 867                 }
 868                 mutex_unlock(&kvm->lock);
 869                 break;
 870         case KVM_S390_VM_MEM_CLR_CMMA:
 871                 ret = -ENXIO;
 872                 if (!sclp.has_cmma)
 873                         break;
 874                 ret = -EINVAL;
 875                 if (!kvm->arch.use_cmma)
 876                         break;
 877
 878                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 879                 mutex_lock(&kvm->lock);
 880                 idx = srcu_read_lock(&kvm->srcu);
 881                 s390_reset_cmma(kvm->arch.gmap->mm);
 882                 srcu_read_unlock(&kvm->srcu, idx);
 883                 mutex_unlock(&kvm->lock);
 884                 ret = 0;
 885                 break;
 886         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 887                 unsigned long new_limit;
 888
 889                 if (kvm_is_ucontrol(kvm))
 890                         return -EINVAL;
 891
 892                 if (get_user(new_limit, (u64 __user *)attr->addr))
 893                         return -EFAULT;
 894
 895                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 896                     new_limit > kvm->arch.mem_limit)
 897                         return -E2BIG;
 898
 899                 if (!new_limit)
 900                         return -EINVAL;
 901
 902                 /* gmap_create takes last usable address */
 903                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 904                         new_limit -= 1;
 905
 906                 ret = -EBUSY;
 907                 mutex_lock(&kvm->lock);
 908                 if (!kvm->created_vcpus) {
 909                         /* gmap_create will round the limit up */
 910                         struct gmap *new = gmap_create(current->mm, new_limit);
 911
 912                         if (!new) {
 913                                 ret = -ENOMEM;
 914                         } else {
 915                                 gmap_remove(kvm->arch.gmap);
 916                                 new->private = kvm;
 917                                 kvm->arch.gmap = new;
 918                                 ret = 0;
 919                         }
 920                 }
 921                 mutex_unlock(&kvm->lock);
 922                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 923                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 924                          (void *) kvm->arch.gmap->asce);
 925                 break;
 926         }
 927         default:
 928                 ret = -ENXIO;
 929                 break;
 930         }
 931         return ret;
 932 }
 933
 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 935
 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 937 {
 938         struct kvm_vcpu *vcpu;
 939         unsigned long i;
 940
 941         kvm_s390_vcpu_block_all(kvm);
 942
 943         kvm_for_each_vcpu(i, vcpu, kvm) {
 944                 kvm_s390_vcpu_crypto_setup(vcpu);
 945                 /* recreate the shadow crycb by leaving the VSIE handler */
 946                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 947         }
 948
 949         kvm_s390_vcpu_unblock_all(kvm);
 950 }
 951
 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 953 {
 954         mutex_lock(&kvm->lock);
 955         switch (attr->attr) {
 956         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 957                 if (!test_kvm_facility(kvm, 76)) {
 958                         mutex_unlock(&kvm->lock);
 959                         return -EINVAL;
 960                 }
 961                 get_random_bytes(
 962                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 963                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 964                 kvm->arch.crypto.aes_kw = 1;
 965                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 966                 break;
 967         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 968                 if (!test_kvm_facility(kvm, 76)) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EINVAL;
 971                 }
 972                 get_random_bytes(
 973                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 974                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 975                 kvm->arch.crypto.dea_kw = 1;
 976                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 977                 break;
 978         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 979                 if (!test_kvm_facility(kvm, 76)) {
 980                         mutex_unlock(&kvm->lock);
 981                         return -EINVAL;
 982                 }
 983                 kvm->arch.crypto.aes_kw = 0;
 984                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 985                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 986                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 987                 break;
 988         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 989                 if (!test_kvm_facility(kvm, 76)) {
 990                         mutex_unlock(&kvm->lock);
 991                         return -EINVAL;
 992                 }
 993                 kvm->arch.crypto.dea_kw = 0;
 994                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 995                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 996                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 997                 break;
 998         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 999                 if (!ap_instructions_available()) {
1000                         mutex_unlock(&kvm->lock);
1001                         return -EOPNOTSUPP;
1002                 }
1003                 kvm->arch.crypto.apie = 1;
1004                 break;
1005         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006                 if (!ap_instructions_available()) {
1007                         mutex_unlock(&kvm->lock);
1008                         return -EOPNOTSUPP;
1009                 }
1010                 kvm->arch.crypto.apie = 0;
1011                 break;
1012         default:
1013                 mutex_unlock(&kvm->lock);
1014                 return -ENXIO;
1015         }
1016
1017         kvm_s390_vcpu_crypto_reset_all(kvm);
1018         mutex_unlock(&kvm->lock);
1019         return 0;
1020 }
1021
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024         unsigned long cx;
1025         struct kvm_vcpu *vcpu;
1026
1027         kvm_for_each_vcpu(cx, vcpu, kvm)
1028                 kvm_s390_sync_request(req, vcpu);
1029 }
1030
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037         struct kvm_memory_slot *ms;
1038         struct kvm_memslots *slots;
1039         unsigned long ram_pages = 0;
1040         int bkt;
1041
1042         /* migration mode already enabled */
1043         if (kvm->arch.migration_mode)
1044                 return 0;
1045         slots = kvm_memslots(kvm);
1046         if (!slots || kvm_memslots_empty(slots))
1047                 return -EINVAL;
1048
1049         if (!kvm->arch.use_cmma) {
1050                 kvm->arch.migration_mode = 1;
1051                 return 0;
1052         }
1053         /* mark all the pages in active slots as dirty */
1054         kvm_for_each_memslot(ms, bkt, slots) {
1055                 if (!ms->dirty_bitmap)
1056                         return -EINVAL;
1057                 /*
1058                  * The second half of the bitmap is only used on x86,
1059                  * and would be wasted otherwise, so we put it to good
1060                  * use here to keep track of the state of the storage
1061                  * attributes.
1062                  */
1063                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064                 ram_pages += ms->npages;
1065         }
1066         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067         kvm->arch.migration_mode = 1;
1068         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069         return 0;
1070 }
1071
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078         /* migration mode already disabled */
1079         if (!kvm->arch.migration_mode)
1080                 return 0;
1081         kvm->arch.migration_mode = 0;
1082         if (kvm->arch.use_cmma)
1083                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084         return 0;
1085 }
1086
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088                                      struct kvm_device_attr *attr)
1089 {
1090         int res = -ENXIO;
1091
1092         mutex_lock(&kvm->slots_lock);
1093         switch (attr->attr) {
1094         case KVM_S390_VM_MIGRATION_START:
1095                 res = kvm_s390_vm_start_migration(kvm);
1096                 break;
1097         case KVM_S390_VM_MIGRATION_STOP:
1098                 res = kvm_s390_vm_stop_migration(kvm);
1099                 break;
1100         default:
1101                 break;
1102         }
1103         mutex_unlock(&kvm->slots_lock);
1104
1105         return res;
1106 }
1107
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109                                      struct kvm_device_attr *attr)
1110 {
1111         u64 mig = kvm->arch.migration_mode;
1112
1113         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114                 return -ENXIO;
1115
1116         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117                 return -EFAULT;
1118         return 0;
1119 }
1120
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         struct kvm_s390_vm_tod_clock gtod;
1124
1125         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126                 return -EFAULT;
1127
1128         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129                 return -EINVAL;
1130         kvm_s390_set_tod_clock(kvm, &gtod);
1131
1132         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133                 gtod.epoch_idx, gtod.tod);
1134
1135         return 0;
1136 }
1137
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         u8 gtod_high;
1141
1142         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143                                            sizeof(gtod_high)))
1144                 return -EFAULT;
1145
1146         if (gtod_high != 0)
1147                 return -EINVAL;
1148         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149
1150         return 0;
1151 }
1152
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155         struct kvm_s390_vm_tod_clock gtod = { 0 };
1156
1157         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158                            sizeof(gtod.tod)))
1159                 return -EFAULT;
1160
1161         kvm_s390_set_tod_clock(kvm, &gtod);
1162         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163         return 0;
1164 }
1165
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         int ret;
1169
1170         if (attr->flags)
1171                 return -EINVAL;
1172
1173         switch (attr->attr) {
1174         case KVM_S390_VM_TOD_EXT:
1175                 ret = kvm_s390_set_tod_ext(kvm, attr);
1176                 break;
1177         case KVM_S390_VM_TOD_HIGH:
1178                 ret = kvm_s390_set_tod_high(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_LOW:
1181                 ret = kvm_s390_set_tod_low(kvm, attr);
1182                 break;
1183         default:
1184                 ret = -ENXIO;
1185                 break;
1186         }
1187         return ret;
1188 }
1189
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191                                    struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193         union tod_clock clk;
1194
1195         preempt_disable();
1196
1197         store_tod_clock_ext(&clk);
1198
1199         gtod->tod = clk.tod + kvm->arch.epoch;
1200         gtod->epoch_idx = 0;
1201         if (test_kvm_facility(kvm, 139)) {
1202                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203                 if (gtod->tod < clk.tod)
1204                         gtod->epoch_idx += 1;
1205         }
1206
1207         preempt_enable();
1208 }
1209
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212         struct kvm_s390_vm_tod_clock gtod;
1213
1214         memset(&gtod, 0, sizeof(gtod));
1215         kvm_s390_get_tod_clock(kvm, &gtod);
1216         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217                 return -EFAULT;
1218
1219         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220                 gtod.epoch_idx, gtod.tod);
1221         return 0;
1222 }
1223
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226         u8 gtod_high = 0;
1227
1228         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229                                          sizeof(gtod_high)))
1230                 return -EFAULT;
1231         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232
1233         return 0;
1234 }
1235
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238         u64 gtod;
1239
1240         gtod = kvm_s390_get_tod_clock_fast(kvm);
1241         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242                 return -EFAULT;
1243         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244
1245         return 0;
1246 }
1247
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250         int ret;
1251
1252         if (attr->flags)
1253                 return -EINVAL;
1254
1255         switch (attr->attr) {
1256         case KVM_S390_VM_TOD_EXT:
1257                 ret = kvm_s390_get_tod_ext(kvm, attr);
1258                 break;
1259         case KVM_S390_VM_TOD_HIGH:
1260                 ret = kvm_s390_get_tod_high(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_TOD_LOW:
1263                 ret = kvm_s390_get_tod_low(kvm, attr);
1264                 break;
1265         default:
1266                 ret = -ENXIO;
1267                 break;
1268         }
1269         return ret;
1270 }
1271
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274         struct kvm_s390_vm_cpu_processor *proc;
1275         u16 lowest_ibc, unblocked_ibc;
1276         int ret = 0;
1277
1278         mutex_lock(&kvm->lock);
1279         if (kvm->created_vcpus) {
1280                 ret = -EBUSY;
1281                 goto out;
1282         }
1283         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284         if (!proc) {
1285                 ret = -ENOMEM;
1286                 goto out;
1287         }
1288         if (!copy_from_user(proc, (void __user *)attr->addr,
1289                             sizeof(*proc))) {
1290                 kvm->arch.model.cpuid = proc->cpuid;
1291                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292                 unblocked_ibc = sclp.ibc & 0xfff;
1293                 if (lowest_ibc && proc->ibc) {
1294                         if (proc->ibc > unblocked_ibc)
1295                                 kvm->arch.model.ibc = unblocked_ibc;
1296                         else if (proc->ibc < lowest_ibc)
1297                                 kvm->arch.model.ibc = lowest_ibc;
1298                         else
1299                                 kvm->arch.model.ibc = proc->ibc;
1300                 }
1301                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1303                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304                          kvm->arch.model.ibc,
1305                          kvm->arch.model.cpuid);
1306                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307                          kvm->arch.model.fac_list[0],
1308                          kvm->arch.model.fac_list[1],
1309                          kvm->arch.model.fac_list[2]);
1310         } else
1311                 ret = -EFAULT;
1312         kfree(proc);
1313 out:
1314         mutex_unlock(&kvm->lock);
1315         return ret;
1316 }
1317
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319                                        struct kvm_device_attr *attr)
1320 {
1321         struct kvm_s390_vm_cpu_feat data;
1322
1323         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324                 return -EFAULT;
1325         if (!bitmap_subset((unsigned long *) data.feat,
1326                            kvm_s390_available_cpu_feat,
1327                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1328                 return -EINVAL;
1329
1330         mutex_lock(&kvm->lock);
1331         if (kvm->created_vcpus) {
1332                 mutex_unlock(&kvm->lock);
1333                 return -EBUSY;
1334         }
1335         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1337         mutex_unlock(&kvm->lock);
1338         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339                          data.feat[0],
1340                          data.feat[1],
1341                          data.feat[2]);
1342         return 0;
1343 }
1344
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346                                           struct kvm_device_attr *attr)
1347 {
1348         mutex_lock(&kvm->lock);
1349         if (kvm->created_vcpus) {
1350                 mutex_unlock(&kvm->lock);
1351                 return -EBUSY;
1352         }
1353
1354         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356                 mutex_unlock(&kvm->lock);
1357                 return -EFAULT;
1358         }
1359         mutex_unlock(&kvm->lock);
1360
1361         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418
1419         return 0;
1420 }
1421
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424         int ret = -ENXIO;
1425
1426         switch (attr->attr) {
1427         case KVM_S390_VM_CPU_PROCESSOR:
1428                 ret = kvm_s390_set_processor(kvm, attr);
1429                 break;
1430         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431                 ret = kvm_s390_set_processor_feat(kvm, attr);
1432                 break;
1433         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435                 break;
1436         }
1437         return ret;
1438 }
1439
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442         struct kvm_s390_vm_cpu_processor *proc;
1443         int ret = 0;
1444
1445         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446         if (!proc) {
1447                 ret = -ENOMEM;
1448                 goto out;
1449         }
1450         proc->cpuid = kvm->arch.model.cpuid;
1451         proc->ibc = kvm->arch.model.ibc;
1452         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453                S390_ARCH_FAC_LIST_SIZE_BYTE);
1454         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455                  kvm->arch.model.ibc,
1456                  kvm->arch.model.cpuid);
1457         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458                  kvm->arch.model.fac_list[0],
1459                  kvm->arch.model.fac_list[1],
1460                  kvm->arch.model.fac_list[2]);
1461         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462                 ret = -EFAULT;
1463         kfree(proc);
1464 out:
1465         return ret;
1466 }
1467
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470         struct kvm_s390_vm_cpu_machine *mach;
1471         int ret = 0;
1472
1473         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474         if (!mach) {
1475                 ret = -ENOMEM;
1476                 goto out;
1477         }
1478         get_cpu_id((struct cpuid *) &mach->cpuid);
1479         mach->ibc = sclp.ibc;
1480         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481                S390_ARCH_FAC_LIST_SIZE_BYTE);
1482         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483                sizeof(stfle_fac_list));
1484         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485                  kvm->arch.model.ibc,
1486                  kvm->arch.model.cpuid);
1487         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488                  mach->fac_mask[0],
1489                  mach->fac_mask[1],
1490                  mach->fac_mask[2]);
1491         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492                  mach->fac_list[0],
1493                  mach->fac_list[1],
1494                  mach->fac_list[2]);
1495         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496                 ret = -EFAULT;
1497         kfree(mach);
1498 out:
1499         return ret;
1500 }
1501
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503                                        struct kvm_device_attr *attr)
1504 {
1505         struct kvm_s390_vm_cpu_feat data;
1506
1507         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1509         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510                 return -EFAULT;
1511         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512                          data.feat[0],
1513                          data.feat[1],
1514                          data.feat[2]);
1515         return 0;
1516 }
1517
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519                                      struct kvm_device_attr *attr)
1520 {
1521         struct kvm_s390_vm_cpu_feat data;
1522
1523         bitmap_copy((unsigned long *) data.feat,
1524                     kvm_s390_available_cpu_feat,
1525                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1526         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527                 return -EFAULT;
1528         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529                          data.feat[0],
1530                          data.feat[1],
1531                          data.feat[2]);
1532         return 0;
1533 }
1534
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536                                           struct kvm_device_attr *attr)
1537 {
1538         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540                 return -EFAULT;
1541
1542         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599
1600         return 0;
1601 }
1602
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604                                         struct kvm_device_attr *attr)
1605 {
1606         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608                 return -EFAULT;
1609
1610         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667
1668         return 0;
1669 }
1670
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673         int ret = -ENXIO;
1674
1675         switch (attr->attr) {
1676         case KVM_S390_VM_CPU_PROCESSOR:
1677                 ret = kvm_s390_get_processor(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_CPU_MACHINE:
1680                 ret = kvm_s390_get_machine(kvm, attr);
1681                 break;
1682         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683                 ret = kvm_s390_get_processor_feat(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_CPU_MACHINE_FEAT:
1686                 ret = kvm_s390_get_machine_feat(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690                 break;
1691         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693                 break;
1694         }
1695         return ret;
1696 }
1697
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700         int ret;
1701
1702         switch (attr->group) {
1703         case KVM_S390_VM_MEM_CTRL:
1704                 ret = kvm_s390_set_mem_control(kvm, attr);
1705                 break;
1706         case KVM_S390_VM_TOD:
1707                 ret = kvm_s390_set_tod(kvm, attr);
1708                 break;
1709         case KVM_S390_VM_CPU_MODEL:
1710                 ret = kvm_s390_set_cpu_model(kvm, attr);
1711                 break;
1712         case KVM_S390_VM_CRYPTO:
1713                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1714                 break;
1715         case KVM_S390_VM_MIGRATION:
1716                 ret = kvm_s390_vm_set_migration(kvm, attr);
1717                 break;
1718         default:
1719                 ret = -ENXIO;
1720                 break;
1721         }
1722
1723         return ret;
1724 }
1725
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728         int ret;
1729
1730         switch (attr->group) {
1731         case KVM_S390_VM_MEM_CTRL:
1732                 ret = kvm_s390_get_mem_control(kvm, attr);
1733                 break;
1734         case KVM_S390_VM_TOD:
1735                 ret = kvm_s390_get_tod(kvm, attr);
1736                 break;
1737         case KVM_S390_VM_CPU_MODEL:
1738                 ret = kvm_s390_get_cpu_model(kvm, attr);
1739                 break;
1740         case KVM_S390_VM_MIGRATION:
1741                 ret = kvm_s390_vm_get_migration(kvm, attr);
1742                 break;
1743         default:
1744                 ret = -ENXIO;
1745                 break;
1746         }
1747
1748         return ret;
1749 }
1750
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753         int ret;
1754
1755         switch (attr->group) {
1756         case KVM_S390_VM_MEM_CTRL:
1757                 switch (attr->attr) {
1758                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1759                 case KVM_S390_VM_MEM_CLR_CMMA:
1760                         ret = sclp.has_cmma ? 0 : -ENXIO;
1761                         break;
1762                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1763                         ret = 0;
1764                         break;
1765                 default:
1766                         ret = -ENXIO;
1767                         break;
1768                 }
1769                 break;
1770         case KVM_S390_VM_TOD:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_TOD_LOW:
1773                 case KVM_S390_VM_TOD_HIGH:
1774                         ret = 0;
1775                         break;
1776                 default:
1777                         ret = -ENXIO;
1778                         break;
1779                 }
1780                 break;
1781         case KVM_S390_VM_CPU_MODEL:
1782                 switch (attr->attr) {
1783                 case KVM_S390_VM_CPU_PROCESSOR:
1784                 case KVM_S390_VM_CPU_MACHINE:
1785                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1787                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789                         ret = 0;
1790                         break;
1791                 default:
1792                         ret = -ENXIO;
1793                         break;
1794                 }
1795                 break;
1796         case KVM_S390_VM_CRYPTO:
1797                 switch (attr->attr) {
1798                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802                         ret = 0;
1803                         break;
1804                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806                         ret = ap_instructions_available() ? 0 : -ENXIO;
1807                         break;
1808                 default:
1809                         ret = -ENXIO;
1810                         break;
1811                 }
1812                 break;
1813         case KVM_S390_VM_MIGRATION:
1814                 ret = 0;
1815                 break;
1816         default:
1817                 ret = -ENXIO;
1818                 break;
1819         }
1820
1821         return ret;
1822 }
1823
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826         uint8_t *keys;
1827         uint64_t hva;
1828         int srcu_idx, i, r = 0;
1829
1830         if (args->flags != 0)
1831                 return -EINVAL;
1832
1833         /* Is this guest using storage keys? */
1834         if (!mm_uses_skeys(current->mm))
1835                 return KVM_S390_GET_SKEYS_NONE;
1836
1837         /* Enforce sane limit on memory allocation */
1838         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839                 return -EINVAL;
1840
1841         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842         if (!keys)
1843                 return -ENOMEM;
1844
1845         mmap_read_lock(current->mm);
1846         srcu_idx = srcu_read_lock(&kvm->srcu);
1847         for (i = 0; i < args->count; i++) {
1848                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1849                 if (kvm_is_error_hva(hva)) {
1850                         r = -EFAULT;
1851                         break;
1852                 }
1853
1854                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855                 if (r)
1856                         break;
1857         }
1858         srcu_read_unlock(&kvm->srcu, srcu_idx);
1859         mmap_read_unlock(current->mm);
1860
1861         if (!r) {
1862                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863                                  sizeof(uint8_t) * args->count);
1864                 if (r)
1865                         r = -EFAULT;
1866         }
1867
1868         kvfree(keys);
1869         return r;
1870 }
1871
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874         uint8_t *keys;
1875         uint64_t hva;
1876         int srcu_idx, i, r = 0;
1877         bool unlocked;
1878
1879         if (args->flags != 0)
1880                 return -EINVAL;
1881
1882         /* Enforce sane limit on memory allocation */
1883         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884                 return -EINVAL;
1885
1886         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887         if (!keys)
1888                 return -ENOMEM;
1889
1890         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891                            sizeof(uint8_t) * args->count);
1892         if (r) {
1893                 r = -EFAULT;
1894                 goto out;
1895         }
1896
1897         /* Enable storage key handling for the guest */
1898         r = s390_enable_skey();
1899         if (r)
1900                 goto out;
1901
1902         i = 0;
1903         mmap_read_lock(current->mm);
1904         srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906                 unlocked = false;
1907                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1908                 if (kvm_is_error_hva(hva)) {
1909                         r = -EFAULT;
1910                         break;
1911                 }
1912
1913                 /* Lowest order bit is reserved */
1914                 if (keys[i] & 0x01) {
1915                         r = -EINVAL;
1916                         break;
1917                 }
1918
1919                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920                 if (r) {
1921                         r = fixup_user_fault(current->mm, hva,
1922                                              FAULT_FLAG_WRITE, &unlocked);
1923                         if (r)
1924                                 break;
1925                 }
1926                 if (!r)
1927                         i++;
1928         }
1929         srcu_read_unlock(&kvm->srcu, srcu_idx);
1930         mmap_read_unlock(current->mm);
1931 out:
1932         kvfree(keys);
1933         return r;
1934 }
1935
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946                               u8 *res, unsigned long bufsize)
1947 {
1948         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1949
1950         args->count = 0;
1951         while (args->count < bufsize) {
1952                 hva = gfn_to_hva(kvm, cur_gfn);
1953                 /*
1954                  * We return an error if the first value was invalid, but we
1955                  * return successfully if at least one value was copied.
1956                  */
1957                 if (kvm_is_error_hva(hva))
1958                         return args->count ? 0 : -EFAULT;
1959                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1960                         pgstev = 0;
1961                 res[args->count++] = (pgstev >> 24) & 0x43;
1962                 cur_gfn++;
1963         }
1964
1965         return 0;
1966 }
1967
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1969                                                      gfn_t gfn)
1970 {
1971         return ____gfn_to_memslot(slots, gfn, true);
1972 }
1973
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975                                               unsigned long cur_gfn)
1976 {
1977         struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978         unsigned long ofs = cur_gfn - ms->base_gfn;
1979         struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1980
1981         if (ms->base_gfn + ms->npages <= cur_gfn) {
1982                 mnode = rb_next(mnode);
1983                 /* If we are above the highest slot, wrap around */
1984                 if (!mnode)
1985                         mnode = rb_first(&slots->gfn_tree);
1986
1987                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1988                 ofs = 0;
1989         }
1990         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991         while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993                 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1994         }
1995         return ms->base_gfn + ofs;
1996 }
1997
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999                              u8 *res, unsigned long bufsize)
2000 {
2001         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002         struct kvm_memslots *slots = kvm_memslots(kvm);
2003         struct kvm_memory_slot *ms;
2004
2005         if (unlikely(kvm_memslots_empty(slots)))
2006                 return 0;
2007
2008         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009         ms = gfn_to_memslot(kvm, cur_gfn);
2010         args->count = 0;
2011         args->start_gfn = cur_gfn;
2012         if (!ms)
2013                 return 0;
2014         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015         mem_end = kvm_s390_get_gfn_end(slots);
2016
2017         while (args->count < bufsize) {
2018                 hva = gfn_to_hva(kvm, cur_gfn);
2019                 if (kvm_is_error_hva(hva))
2020                         return 0;
2021                 /* Decrement only if we actually flipped the bit to 0 */
2022                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025                         pgstev = 0;
2026                 /* Save the value */
2027                 res[args->count++] = (pgstev >> 24) & 0x43;
2028                 /* If the next bit is too far away, stop. */
2029                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030                         return 0;
2031                 /* If we reached the previous "next", find the next one */
2032                 if (cur_gfn == next_gfn)
2033                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034                 /* Reached the end of memory or of the buffer, stop */
2035                 if ((next_gfn >= mem_end) ||
2036                     (next_gfn - args->start_gfn >= bufsize))
2037                         return 0;
2038                 cur_gfn++;
2039                 /* Reached the end of the current memslot, take the next one. */
2040                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2041                         ms = gfn_to_memslot(kvm, cur_gfn);
2042                         if (!ms)
2043                                 return 0;
2044                 }
2045         }
2046         return 0;
2047 }
2048
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058                                   struct kvm_s390_cmma_log *args)
2059 {
2060         unsigned long bufsize;
2061         int srcu_idx, peek, ret;
2062         u8 *values;
2063
2064         if (!kvm->arch.use_cmma)
2065                 return -ENXIO;
2066         /* Invalid/unsupported flags were specified */
2067         if (args->flags & ~KVM_S390_CMMA_PEEK)
2068                 return -EINVAL;
2069         /* Migration mode query, and we are not doing a migration */
2070         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071         if (!peek && !kvm->arch.migration_mode)
2072                 return -EINVAL;
2073         /* CMMA is disabled or was not used, or the buffer has length zero */
2074         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075         if (!bufsize || !kvm->mm->context.uses_cmm) {
2076                 memset(args, 0, sizeof(*args));
2077                 return 0;
2078         }
2079         /* We are not peeking, and there are no dirty pages */
2080         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081                 memset(args, 0, sizeof(*args));
2082                 return 0;
2083         }
2084
2085         values = vmalloc(bufsize);
2086         if (!values)
2087                 return -ENOMEM;
2088
2089         mmap_read_lock(kvm->mm);
2090         srcu_idx = srcu_read_lock(&kvm->srcu);
2091         if (peek)
2092                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093         else
2094                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095         srcu_read_unlock(&kvm->srcu, srcu_idx);
2096         mmap_read_unlock(kvm->mm);
2097
2098         if (kvm->arch.migration_mode)
2099                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100         else
2101                 args->remaining = 0;
2102
2103         if (copy_to_user((void __user *)args->values, values, args->count))
2104                 ret = -EFAULT;
2105
2106         vfree(values);
2107         return ret;
2108 }
2109
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116                                   const struct kvm_s390_cmma_log *args)
2117 {
2118         unsigned long hva, mask, pgstev, i;
2119         uint8_t *bits;
2120         int srcu_idx, r = 0;
2121
2122         mask = args->mask;
2123
2124         if (!kvm->arch.use_cmma)
2125                 return -ENXIO;
2126         /* invalid/unsupported flags */
2127         if (args->flags != 0)
2128                 return -EINVAL;
2129         /* Enforce sane limit on memory allocation */
2130         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131                 return -EINVAL;
2132         /* Nothing to do */
2133         if (args->count == 0)
2134                 return 0;
2135
2136         bits = vmalloc(array_size(sizeof(*bits), args->count));
2137         if (!bits)
2138                 return -ENOMEM;
2139
2140         r = copy_from_user(bits, (void __user *)args->values, args->count);
2141         if (r) {
2142                 r = -EFAULT;
2143                 goto out;
2144         }
2145
2146         mmap_read_lock(kvm->mm);
2147         srcu_idx = srcu_read_lock(&kvm->srcu);
2148         for (i = 0; i < args->count; i++) {
2149                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2150                 if (kvm_is_error_hva(hva)) {
2151                         r = -EFAULT;
2152                         break;
2153                 }
2154
2155                 pgstev = bits[i];
2156                 pgstev = pgstev << 24;
2157                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159         }
2160         srcu_read_unlock(&kvm->srcu, srcu_idx);
2161         mmap_read_unlock(kvm->mm);
2162
2163         if (!kvm->mm->context.uses_cmm) {
2164                 mmap_write_lock(kvm->mm);
2165                 kvm->mm->context.uses_cmm = 1;
2166                 mmap_write_unlock(kvm->mm);
2167         }
2168 out:
2169         vfree(bits);
2170         return r;
2171 }
2172
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175         struct kvm_vcpu *vcpu;
2176         u16 rc, rrc;
2177         int ret = 0;
2178         unsigned long i;
2179
2180         /*
2181          * We ignore failures and try to destroy as many CPUs as possible.
2182          * At the same time we must not free the assigned resources when
2183          * this fails, as the ultravisor has still access to that memory.
2184          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185          * behind.
2186          * We want to return the first failure rc and rrc, though.
2187          */
2188         kvm_for_each_vcpu(i, vcpu, kvm) {
2189                 mutex_lock(&vcpu->mutex);
2190                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191                         *rcp = rc;
2192                         *rrcp = rrc;
2193                         ret = -EIO;
2194                 }
2195                 mutex_unlock(&vcpu->mutex);
2196         }
2197         return ret;
2198 }
2199
2200 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2201 {
2202         unsigned long i;
2203         int r = 0;
2204         u16 dummy;
2205
2206         struct kvm_vcpu *vcpu;
2207
2208         kvm_for_each_vcpu(i, vcpu, kvm) {
2209                 mutex_lock(&vcpu->mutex);
2210                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2211                 mutex_unlock(&vcpu->mutex);
2212                 if (r)
2213                         break;
2214         }
2215         if (r)
2216                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2217         return r;
2218 }
2219
2220 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2221 {
2222         int r = 0;
2223         u16 dummy;
2224         void __user *argp = (void __user *)cmd->data;
2225
2226         switch (cmd->cmd) {
2227         case KVM_PV_ENABLE: {
2228                 r = -EINVAL;
2229                 if (kvm_s390_pv_is_protected(kvm))
2230                         break;
2231
2232                 /*
2233                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2234                  *  esca, we need no cleanup in the error cases below
2235                  */
2236                 r = sca_switch_to_extended(kvm);
2237                 if (r)
2238                         break;
2239
2240                 mmap_write_lock(current->mm);
2241                 r = gmap_mark_unmergeable();
2242                 mmap_write_unlock(current->mm);
2243                 if (r)
2244                         break;
2245
2246                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2247                 if (r)
2248                         break;
2249
2250                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2251                 if (r)
2252                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2253
2254                 /* we need to block service interrupts from now on */
2255                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2256                 break;
2257         }
2258         case KVM_PV_DISABLE: {
2259                 r = -EINVAL;
2260                 if (!kvm_s390_pv_is_protected(kvm))
2261                         break;
2262
2263                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2264                 /*
2265                  * If a CPU could not be destroyed, destroy VM will also fail.
2266                  * There is no point in trying to destroy it. Instead return
2267                  * the rc and rrc from the first CPU that failed destroying.
2268                  */
2269                 if (r)
2270                         break;
2271                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2272
2273                 /* no need to block service interrupts any more */
2274                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2275                 break;
2276         }
2277         case KVM_PV_SET_SEC_PARMS: {
2278                 struct kvm_s390_pv_sec_parm parms = {};
2279                 void *hdr;
2280
2281                 r = -EINVAL;
2282                 if (!kvm_s390_pv_is_protected(kvm))
2283                         break;
2284
2285                 r = -EFAULT;
2286                 if (copy_from_user(&parms, argp, sizeof(parms)))
2287                         break;
2288
2289                 /* Currently restricted to 8KB */
2290                 r = -EINVAL;
2291                 if (parms.length > PAGE_SIZE * 2)
2292                         break;
2293
2294                 r = -ENOMEM;
2295                 hdr = vmalloc(parms.length);
2296                 if (!hdr)
2297                         break;
2298
2299                 r = -EFAULT;
2300                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2301                                     parms.length))
2302                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2303                                                       &cmd->rc, &cmd->rrc);
2304
2305                 vfree(hdr);
2306                 break;
2307         }
2308         case KVM_PV_UNPACK: {
2309                 struct kvm_s390_pv_unp unp = {};
2310
2311                 r = -EINVAL;
2312                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2313                         break;
2314
2315                 r = -EFAULT;
2316                 if (copy_from_user(&unp, argp, sizeof(unp)))
2317                         break;
2318
2319                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2320                                        &cmd->rc, &cmd->rrc);
2321                 break;
2322         }
2323         case KVM_PV_VERIFY: {
2324                 r = -EINVAL;
2325                 if (!kvm_s390_pv_is_protected(kvm))
2326                         break;
2327
2328                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2329                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2330                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2331                              cmd->rrc);
2332                 break;
2333         }
2334         case KVM_PV_PREP_RESET: {
2335                 r = -EINVAL;
2336                 if (!kvm_s390_pv_is_protected(kvm))
2337                         break;
2338
2339                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2340                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2341                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2342                              cmd->rc, cmd->rrc);
2343                 break;
2344         }
2345         case KVM_PV_UNSHARE_ALL: {
2346                 r = -EINVAL;
2347                 if (!kvm_s390_pv_is_protected(kvm))
2348                         break;
2349
2350                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2351                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2352                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2353                              cmd->rc, cmd->rrc);
2354                 break;
2355         }
2356         default:
2357                 r = -ENOTTY;
2358         }
2359         return r;
2360 }
2361
2362 long kvm_arch_vm_ioctl(struct file *filp,
2363                        unsigned int ioctl, unsigned long arg)
2364 {
2365         struct kvm *kvm = filp->private_data;
2366         void __user *argp = (void __user *)arg;
2367         struct kvm_device_attr attr;
2368         int r;
2369
2370         switch (ioctl) {
2371         case KVM_S390_INTERRUPT: {
2372                 struct kvm_s390_interrupt s390int;
2373
2374                 r = -EFAULT;
2375                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2376                         break;
2377                 r = kvm_s390_inject_vm(kvm, &s390int);
2378                 break;
2379         }
2380         case KVM_CREATE_IRQCHIP: {
2381                 struct kvm_irq_routing_entry routing;
2382
2383                 r = -EINVAL;
2384                 if (kvm->arch.use_irqchip) {
2385                         /* Set up dummy routing. */
2386                         memset(&routing, 0, sizeof(routing));
2387                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2388                 }
2389                 break;
2390         }
2391         case KVM_SET_DEVICE_ATTR: {
2392                 r = -EFAULT;
2393                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2394                         break;
2395                 r = kvm_s390_vm_set_attr(kvm, &attr);
2396                 break;
2397         }
2398         case KVM_GET_DEVICE_ATTR: {
2399                 r = -EFAULT;
2400                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2401                         break;
2402                 r = kvm_s390_vm_get_attr(kvm, &attr);
2403                 break;
2404         }
2405         case KVM_HAS_DEVICE_ATTR: {
2406                 r = -EFAULT;
2407                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2408                         break;
2409                 r = kvm_s390_vm_has_attr(kvm, &attr);
2410                 break;
2411         }
2412         case KVM_S390_GET_SKEYS: {
2413                 struct kvm_s390_skeys args;
2414
2415                 r = -EFAULT;
2416                 if (copy_from_user(&args, argp,
2417                                    sizeof(struct kvm_s390_skeys)))
2418                         break;
2419                 r = kvm_s390_get_skeys(kvm, &args);
2420                 break;
2421         }
2422         case KVM_S390_SET_SKEYS: {
2423                 struct kvm_s390_skeys args;
2424
2425                 r = -EFAULT;
2426                 if (copy_from_user(&args, argp,
2427                                    sizeof(struct kvm_s390_skeys)))
2428                         break;
2429                 r = kvm_s390_set_skeys(kvm, &args);
2430                 break;
2431         }
2432         case KVM_S390_GET_CMMA_BITS: {
2433                 struct kvm_s390_cmma_log args;
2434
2435                 r = -EFAULT;
2436                 if (copy_from_user(&args, argp, sizeof(args)))
2437                         break;
2438                 mutex_lock(&kvm->slots_lock);
2439                 r = kvm_s390_get_cmma_bits(kvm, &args);
2440                 mutex_unlock(&kvm->slots_lock);
2441                 if (!r) {
2442                         r = copy_to_user(argp, &args, sizeof(args));
2443                         if (r)
2444                                 r = -EFAULT;
2445                 }
2446                 break;
2447         }
2448         case KVM_S390_SET_CMMA_BITS: {
2449                 struct kvm_s390_cmma_log args;
2450
2451                 r = -EFAULT;
2452                 if (copy_from_user(&args, argp, sizeof(args)))
2453                         break;
2454                 mutex_lock(&kvm->slots_lock);
2455                 r = kvm_s390_set_cmma_bits(kvm, &args);
2456                 mutex_unlock(&kvm->slots_lock);
2457                 break;
2458         }
2459         case KVM_S390_PV_COMMAND: {
2460                 struct kvm_pv_cmd args;
2461
2462                 /* protvirt means user cpu state */
2463                 kvm_s390_set_user_cpu_state_ctrl(kvm);
2464                 r = 0;
2465                 if (!is_prot_virt_host()) {
2466                         r = -EINVAL;
2467                         break;
2468                 }
2469                 if (copy_from_user(&args, argp, sizeof(args))) {
2470                         r = -EFAULT;
2471                         break;
2472                 }
2473                 if (args.flags) {
2474                         r = -EINVAL;
2475                         break;
2476                 }
2477                 mutex_lock(&kvm->lock);
2478                 r = kvm_s390_handle_pv(kvm, &args);
2479                 mutex_unlock(&kvm->lock);
2480                 if (copy_to_user(argp, &args, sizeof(args))) {
2481                         r = -EFAULT;
2482                         break;
2483                 }
2484                 break;
2485         }
2486         default:
2487                 r = -ENOTTY;
2488         }
2489
2490         return r;
2491 }
2492
2493 static int kvm_s390_apxa_installed(void)
2494 {
2495         struct ap_config_info info;
2496
2497         if (ap_instructions_available()) {
2498                 if (ap_qci(&info) == 0)
2499                         return info.apxa;
2500         }
2501
2502         return 0;
2503 }
2504
2505 /*
2506  * The format of the crypto control block (CRYCB) is specified in the 3 low
2507  * order bits of the CRYCB designation (CRYCBD) field as follows:
2508  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2509  *           AP extended addressing (APXA) facility are installed.
2510  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2511  * Format 2: Both the APXA and MSAX3 facilities are installed
2512  */
2513 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2514 {
2515         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2516
2517         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2518         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2519
2520         /* Check whether MSAX3 is installed */
2521         if (!test_kvm_facility(kvm, 76))
2522                 return;
2523
2524         if (kvm_s390_apxa_installed())
2525                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2526         else
2527                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2528 }
2529
2530 /*
2531  * kvm_arch_crypto_set_masks
2532  *
2533  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2534  *       to be set.
2535  * @apm: the mask identifying the accessible AP adapters
2536  * @aqm: the mask identifying the accessible AP domains
2537  * @adm: the mask identifying the accessible AP control domains
2538  *
2539  * Set the masks that identify the adapters, domains and control domains to
2540  * which the KVM guest is granted access.
2541  *
2542  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2543  *       function.
2544  */
2545 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2546                                unsigned long *aqm, unsigned long *adm)
2547 {
2548         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2549
2550         kvm_s390_vcpu_block_all(kvm);
2551
2552         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2553         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2554                 memcpy(crycb->apcb1.apm, apm, 32);
2555                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2556                          apm[0], apm[1], apm[2], apm[3]);
2557                 memcpy(crycb->apcb1.aqm, aqm, 32);
2558                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2559                          aqm[0], aqm[1], aqm[2], aqm[3]);
2560                 memcpy(crycb->apcb1.adm, adm, 32);
2561                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2562                          adm[0], adm[1], adm[2], adm[3]);
2563                 break;
2564         case CRYCB_FORMAT1:
2565         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2566                 memcpy(crycb->apcb0.apm, apm, 8);
2567                 memcpy(crycb->apcb0.aqm, aqm, 2);
2568                 memcpy(crycb->apcb0.adm, adm, 2);
2569                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2570                          apm[0], *((unsigned short *)aqm),
2571                          *((unsigned short *)adm));
2572                 break;
2573         default:        /* Can not happen */
2574                 break;
2575         }
2576
2577         /* recreate the shadow crycb for each vcpu */
2578         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2579         kvm_s390_vcpu_unblock_all(kvm);
2580 }
2581 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2582
2583 /*
2584  * kvm_arch_crypto_clear_masks
2585  *
2586  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2587  *       to be cleared.
2588  *
2589  * Clear the masks that identify the adapters, domains and control domains to
2590  * which the KVM guest is granted access.
2591  *
2592  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2593  *       function.
2594  */
2595 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2596 {
2597         kvm_s390_vcpu_block_all(kvm);
2598
2599         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2600                sizeof(kvm->arch.crypto.crycb->apcb0));
2601         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2602                sizeof(kvm->arch.crypto.crycb->apcb1));
2603
2604         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2605         /* recreate the shadow crycb for each vcpu */
2606         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2607         kvm_s390_vcpu_unblock_all(kvm);
2608 }
2609 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2610
2611 static u64 kvm_s390_get_initial_cpuid(void)
2612 {
2613         struct cpuid cpuid;
2614
2615         get_cpu_id(&cpuid);
2616         cpuid.version = 0xff;
2617         return *((u64 *) &cpuid);
2618 }
2619
2620 static void kvm_s390_crypto_init(struct kvm *kvm)
2621 {
2622         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2623         kvm_s390_set_crycb_format(kvm);
2624         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2625
2626         if (!test_kvm_facility(kvm, 76))
2627                 return;
2628
2629         /* Enable AES/DEA protected key functions by default */
2630         kvm->arch.crypto.aes_kw = 1;
2631         kvm->arch.crypto.dea_kw = 1;
2632         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2633                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2634         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2635                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2636 }
2637
2638 static void sca_dispose(struct kvm *kvm)
2639 {
2640         if (kvm->arch.use_esca)
2641                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2642         else
2643                 free_page((unsigned long)(kvm->arch.sca));
2644         kvm->arch.sca = NULL;
2645 }
2646
2647 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2648 {
2649         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2650         int i, rc;
2651         char debug_name[16];
2652         static unsigned long sca_offset;
2653
2654         rc = -EINVAL;
2655 #ifdef CONFIG_KVM_S390_UCONTROL
2656         if (type & ~KVM_VM_S390_UCONTROL)
2657                 goto out_err;
2658         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2659                 goto out_err;
2660 #else
2661         if (type)
2662                 goto out_err;
2663 #endif
2664
2665         rc = s390_enable_sie();
2666         if (rc)
2667                 goto out_err;
2668
2669         rc = -ENOMEM;
2670
2671         if (!sclp.has_64bscao)
2672                 alloc_flags |= GFP_DMA;
2673         rwlock_init(&kvm->arch.sca_lock);
2674         /* start with basic SCA */
2675         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2676         if (!kvm->arch.sca)
2677                 goto out_err;
2678         mutex_lock(&kvm_lock);
2679         sca_offset += 16;
2680         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2681                 sca_offset = 0;
2682         kvm->arch.sca = (struct bsca_block *)
2683                         ((char *) kvm->arch.sca + sca_offset);
2684         mutex_unlock(&kvm_lock);
2685
2686         sprintf(debug_name, "kvm-%u", current->pid);
2687
2688         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2689         if (!kvm->arch.dbf)
2690                 goto out_err;
2691
2692         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2693         kvm->arch.sie_page2 =
2694              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2695         if (!kvm->arch.sie_page2)
2696                 goto out_err;
2697
2698         kvm->arch.sie_page2->kvm = kvm;
2699         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2700
2701         for (i = 0; i < kvm_s390_fac_size(); i++) {
2702                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2703                                               (kvm_s390_fac_base[i] |
2704                                                kvm_s390_fac_ext[i]);
2705                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2706                                               kvm_s390_fac_base[i];
2707         }
2708         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2709
2710         /* we are always in czam mode - even on pre z14 machines */
2711         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2712         set_kvm_facility(kvm->arch.model.fac_list, 138);
2713         /* we emulate STHYI in kvm */
2714         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2715         set_kvm_facility(kvm->arch.model.fac_list, 74);
2716         if (MACHINE_HAS_TLB_GUEST) {
2717                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2718                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2719         }
2720
2721         if (css_general_characteristics.aiv && test_facility(65))
2722                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2723
2724         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2725         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2726
2727         kvm_s390_crypto_init(kvm);
2728
2729         mutex_init(&kvm->arch.float_int.ais_lock);
2730         spin_lock_init(&kvm->arch.float_int.lock);
2731         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2732                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2733         init_waitqueue_head(&kvm->arch.ipte_wq);
2734         mutex_init(&kvm->arch.ipte_mutex);
2735
2736         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2737         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2738
2739         if (type & KVM_VM_S390_UCONTROL) {
2740                 kvm->arch.gmap = NULL;
2741                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2742         } else {
2743                 if (sclp.hamax == U64_MAX)
2744                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2745                 else
2746                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2747                                                     sclp.hamax + 1);
2748                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2749                 if (!kvm->arch.gmap)
2750                         goto out_err;
2751                 kvm->arch.gmap->private = kvm;
2752                 kvm->arch.gmap->pfault_enabled = 0;
2753         }
2754
2755         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2756         kvm->arch.use_skf = sclp.has_skey;
2757         spin_lock_init(&kvm->arch.start_stop_lock);
2758         kvm_s390_vsie_init(kvm);
2759         if (use_gisa)
2760                 kvm_s390_gisa_init(kvm);
2761         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2762
2763         return 0;
2764 out_err:
2765         free_page((unsigned long)kvm->arch.sie_page2);
2766         debug_unregister(kvm->arch.dbf);
2767         sca_dispose(kvm);
2768         KVM_EVENT(3, "creation of vm failed: %d", rc);
2769         return rc;
2770 }
2771
2772 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2773 {
2774         u16 rc, rrc;
2775
2776         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2777         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2778         kvm_s390_clear_local_irqs(vcpu);
2779         kvm_clear_async_pf_completion_queue(vcpu);
2780         if (!kvm_is_ucontrol(vcpu->kvm))
2781                 sca_del_vcpu(vcpu);
2782
2783         if (kvm_is_ucontrol(vcpu->kvm))
2784                 gmap_remove(vcpu->arch.gmap);
2785
2786         if (vcpu->kvm->arch.use_cmma)
2787                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2788         /* We can not hold the vcpu mutex here, we are already dying */
2789         if (kvm_s390_pv_cpu_get_handle(vcpu))
2790                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2791         free_page((unsigned long)(vcpu->arch.sie_block));
2792 }
2793
2794 void kvm_arch_destroy_vm(struct kvm *kvm)
2795 {
2796         u16 rc, rrc;
2797
2798         kvm_destroy_vcpus(kvm);
2799         sca_dispose(kvm);
2800         kvm_s390_gisa_destroy(kvm);
2801         /*
2802          * We are already at the end of life and kvm->lock is not taken.
2803          * This is ok as the file descriptor is closed by now and nobody
2804          * can mess with the pv state. To avoid lockdep_assert_held from
2805          * complaining we do not use kvm_s390_pv_is_protected.
2806          */
2807         if (kvm_s390_pv_get_handle(kvm))
2808                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2809         debug_unregister(kvm->arch.dbf);
2810         free_page((unsigned long)kvm->arch.sie_page2);
2811         if (!kvm_is_ucontrol(kvm))
2812                 gmap_remove(kvm->arch.gmap);
2813         kvm_s390_destroy_adapters(kvm);
2814         kvm_s390_clear_float_irqs(kvm);
2815         kvm_s390_vsie_destroy(kvm);
2816         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2817 }
2818
2819 /* Section: vcpu related */
2820 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2821 {
2822         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2823         if (!vcpu->arch.gmap)
2824                 return -ENOMEM;
2825         vcpu->arch.gmap->private = vcpu->kvm;
2826
2827         return 0;
2828 }
2829
2830 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2831 {
2832         if (!kvm_s390_use_sca_entries())
2833                 return;
2834         read_lock(&vcpu->kvm->arch.sca_lock);
2835         if (vcpu->kvm->arch.use_esca) {
2836                 struct esca_block *sca = vcpu->kvm->arch.sca;
2837
2838                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2839                 sca->cpu[vcpu->vcpu_id].sda = 0;
2840         } else {
2841                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2842
2843                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2844                 sca->cpu[vcpu->vcpu_id].sda = 0;
2845         }
2846         read_unlock(&vcpu->kvm->arch.sca_lock);
2847 }
2848
2849 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2850 {
2851         if (!kvm_s390_use_sca_entries()) {
2852                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2853
2854                 /* we still need the basic sca for the ipte control */
2855                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2856                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2857                 return;
2858         }
2859         read_lock(&vcpu->kvm->arch.sca_lock);
2860         if (vcpu->kvm->arch.use_esca) {
2861                 struct esca_block *sca = vcpu->kvm->arch.sca;
2862
2863                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2864                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2865                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2866                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2867                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2868         } else {
2869                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2870
2871                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2872                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2873                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2874                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2875         }
2876         read_unlock(&vcpu->kvm->arch.sca_lock);
2877 }
2878
2879 /* Basic SCA to Extended SCA data copy routines */
2880 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2881 {
2882         d->sda = s->sda;
2883         d->sigp_ctrl.c = s->sigp_ctrl.c;
2884         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2885 }
2886
2887 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2888 {
2889         int i;
2890
2891         d->ipte_control = s->ipte_control;
2892         d->mcn[0] = s->mcn;
2893         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2894                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2895 }
2896
2897 static int sca_switch_to_extended(struct kvm *kvm)
2898 {
2899         struct bsca_block *old_sca = kvm->arch.sca;
2900         struct esca_block *new_sca;
2901         struct kvm_vcpu *vcpu;
2902         unsigned long vcpu_idx;
2903         u32 scaol, scaoh;
2904
2905         if (kvm->arch.use_esca)
2906                 return 0;
2907
2908         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2909         if (!new_sca)
2910                 return -ENOMEM;
2911
2912         scaoh = (u32)((u64)(new_sca) >> 32);
2913         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2914
2915         kvm_s390_vcpu_block_all(kvm);
2916         write_lock(&kvm->arch.sca_lock);
2917
2918         sca_copy_b_to_e(new_sca, old_sca);
2919
2920         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2921                 vcpu->arch.sie_block->scaoh = scaoh;
2922                 vcpu->arch.sie_block->scaol = scaol;
2923                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2924         }
2925         kvm->arch.sca = new_sca;
2926         kvm->arch.use_esca = 1;
2927
2928         write_unlock(&kvm->arch.sca_lock);
2929         kvm_s390_vcpu_unblock_all(kvm);
2930
2931         free_page((unsigned long)old_sca);
2932
2933         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2934                  old_sca, kvm->arch.sca);
2935         return 0;
2936 }
2937
2938 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2939 {
2940         int rc;
2941
2942         if (!kvm_s390_use_sca_entries()) {
2943                 if (id < KVM_MAX_VCPUS)
2944                         return true;
2945                 return false;
2946         }
2947         if (id < KVM_S390_BSCA_CPU_SLOTS)
2948                 return true;
2949         if (!sclp.has_esca || !sclp.has_64bscao)
2950                 return false;
2951
2952         mutex_lock(&kvm->lock);
2953         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2954         mutex_unlock(&kvm->lock);
2955
2956         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2957 }
2958
2959 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2960 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2961 {
2962         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2963         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2964         vcpu->arch.cputm_start = get_tod_clock_fast();
2965         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2966 }
2967
2968 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2969 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2970 {
2971         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2972         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2973         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2974         vcpu->arch.cputm_start = 0;
2975         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2976 }
2977
2978 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2979 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2980 {
2981         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2982         vcpu->arch.cputm_enabled = true;
2983         __start_cpu_timer_accounting(vcpu);
2984 }
2985
2986 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2987 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2988 {
2989         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2990         __stop_cpu_timer_accounting(vcpu);
2991         vcpu->arch.cputm_enabled = false;
2992 }
2993
2994 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2995 {
2996         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2997         __enable_cpu_timer_accounting(vcpu);
2998         preempt_enable();
2999 }
3000
3001 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3002 {
3003         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3004         __disable_cpu_timer_accounting(vcpu);
3005         preempt_enable();
3006 }
3007
3008 /* set the cpu timer - may only be called from the VCPU thread itself */
3009 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3010 {
3011         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3012         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3013         if (vcpu->arch.cputm_enabled)
3014                 vcpu->arch.cputm_start = get_tod_clock_fast();
3015         vcpu->arch.sie_block->cputm = cputm;
3016         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3017         preempt_enable();
3018 }
3019
3020 /* update and get the cpu timer - can also be called from other VCPU threads */
3021 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3022 {
3023         unsigned int seq;
3024         __u64 value;
3025
3026         if (unlikely(!vcpu->arch.cputm_enabled))
3027                 return vcpu->arch.sie_block->cputm;
3028
3029         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3030         do {
3031                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3032                 /*
3033                  * If the writer would ever execute a read in the critical
3034                  * section, e.g. in irq context, we have a deadlock.
3035                  */
3036                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3037                 value = vcpu->arch.sie_block->cputm;
3038                 /* if cputm_start is 0, accounting is being started/stopped */
3039                 if (likely(vcpu->arch.cputm_start))
3040                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3041         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3042         preempt_enable();
3043         return value;
3044 }
3045
3046 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3047 {
3048
3049         gmap_enable(vcpu->arch.enabled_gmap);
3050         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3051         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3052                 __start_cpu_timer_accounting(vcpu);
3053         vcpu->cpu = cpu;
3054 }
3055
3056 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3057 {
3058         vcpu->cpu = -1;
3059         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3060                 __stop_cpu_timer_accounting(vcpu);
3061         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3062         vcpu->arch.enabled_gmap = gmap_get_enabled();
3063         gmap_disable(vcpu->arch.enabled_gmap);
3064
3065 }
3066
3067 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3068 {
3069         mutex_lock(&vcpu->kvm->lock);
3070         preempt_disable();
3071         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3072         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3073         preempt_enable();
3074         mutex_unlock(&vcpu->kvm->lock);
3075         if (!kvm_is_ucontrol(vcpu->kvm)) {
3076                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3077                 sca_add_vcpu(vcpu);
3078         }
3079         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3080                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3081         /* make vcpu_load load the right gmap on the first trigger */
3082         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3083 }
3084
3085 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3086 {
3087         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3088             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3089                 return true;
3090         return false;
3091 }
3092
3093 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3094 {
3095         /* At least one ECC subfunction must be present */
3096         return kvm_has_pckmo_subfunc(kvm, 32) ||
3097                kvm_has_pckmo_subfunc(kvm, 33) ||
3098                kvm_has_pckmo_subfunc(kvm, 34) ||
3099                kvm_has_pckmo_subfunc(kvm, 40) ||
3100                kvm_has_pckmo_subfunc(kvm, 41);
3101
3102 }
3103
3104 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3105 {
3106         /*
3107          * If the AP instructions are not being interpreted and the MSAX3
3108          * facility is not configured for the guest, there is nothing to set up.
3109          */
3110         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3111                 return;
3112
3113         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3114         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3115         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3116         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3117
3118         if (vcpu->kvm->arch.crypto.apie)
3119                 vcpu->arch.sie_block->eca |= ECA_APIE;
3120
3121         /* Set up protected key support */
3122         if (vcpu->kvm->arch.crypto.aes_kw) {
3123                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3124                 /* ecc is also wrapped with AES key */
3125                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3126                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3127         }
3128
3129         if (vcpu->kvm->arch.crypto.dea_kw)
3130                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3131 }
3132
3133 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3134 {
3135         free_page(vcpu->arch.sie_block->cbrlo);
3136         vcpu->arch.sie_block->cbrlo = 0;
3137 }
3138
3139 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3140 {
3141         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3142         if (!vcpu->arch.sie_block->cbrlo)
3143                 return -ENOMEM;
3144         return 0;
3145 }
3146
3147 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3148 {
3149         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3150
3151         vcpu->arch.sie_block->ibc = model->ibc;
3152         if (test_kvm_facility(vcpu->kvm, 7))
3153                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3154 }
3155
3156 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3157 {
3158         int rc = 0;
3159         u16 uvrc, uvrrc;
3160
3161         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3162                                                     CPUSTAT_SM |
3163                                                     CPUSTAT_STOPPED);
3164
3165         if (test_kvm_facility(vcpu->kvm, 78))
3166                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3167         else if (test_kvm_facility(vcpu->kvm, 8))
3168                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3169
3170         kvm_s390_vcpu_setup_model(vcpu);
3171
3172         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3173         if (MACHINE_HAS_ESOP)
3174                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3175         if (test_kvm_facility(vcpu->kvm, 9))
3176                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3177         if (test_kvm_facility(vcpu->kvm, 73))
3178                 vcpu->arch.sie_block->ecb |= ECB_TE;
3179         if (!kvm_is_ucontrol(vcpu->kvm))
3180                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3181
3182         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3183                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3184         if (test_kvm_facility(vcpu->kvm, 130))
3185                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3186         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3187         if (sclp.has_cei)
3188                 vcpu->arch.sie_block->eca |= ECA_CEI;
3189         if (sclp.has_ib)
3190                 vcpu->arch.sie_block->eca |= ECA_IB;
3191         if (sclp.has_siif)
3192                 vcpu->arch.sie_block->eca |= ECA_SII;
3193         if (sclp.has_sigpif)
3194                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3195         if (test_kvm_facility(vcpu->kvm, 129)) {
3196                 vcpu->arch.sie_block->eca |= ECA_VX;
3197                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3198         }
3199         if (test_kvm_facility(vcpu->kvm, 139))
3200                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3201         if (test_kvm_facility(vcpu->kvm, 156))
3202                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3203         if (vcpu->arch.sie_block->gd) {
3204                 vcpu->arch.sie_block->eca |= ECA_AIV;
3205                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3206                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3207         }
3208         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3209                                         | SDNXC;
3210         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3211
3212         if (sclp.has_kss)
3213                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3214         else
3215                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3216
3217         if (vcpu->kvm->arch.use_cmma) {
3218                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3219                 if (rc)
3220                         return rc;
3221         }
3222         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3223         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3224
3225         vcpu->arch.sie_block->hpid = HPID_KVM;
3226
3227         kvm_s390_vcpu_crypto_setup(vcpu);
3228
3229         mutex_lock(&vcpu->kvm->lock);
3230         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3231                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3232                 if (rc)
3233                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3234         }
3235         mutex_unlock(&vcpu->kvm->lock);
3236
3237         return rc;
3238 }
3239
3240 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3241 {
3242         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3243                 return -EINVAL;
3244         return 0;
3245 }
3246
3247 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3248 {
3249         struct sie_page *sie_page;
3250         int rc;
3251
3252         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3253         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3254         if (!sie_page)
3255                 return -ENOMEM;
3256
3257         vcpu->arch.sie_block = &sie_page->sie_block;
3258         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3259
3260         /* the real guest size will always be smaller than msl */
3261         vcpu->arch.sie_block->mso = 0;
3262         vcpu->arch.sie_block->msl = sclp.hamax;
3263
3264         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3265         spin_lock_init(&vcpu->arch.local_int.lock);
3266         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3267         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3268                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3269         seqcount_init(&vcpu->arch.cputm_seqcount);
3270
3271         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3272         kvm_clear_async_pf_completion_queue(vcpu);
3273         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3274                                     KVM_SYNC_GPRS |
3275                                     KVM_SYNC_ACRS |
3276                                     KVM_SYNC_CRS |
3277                                     KVM_SYNC_ARCH0 |
3278                                     KVM_SYNC_PFAULT |
3279                                     KVM_SYNC_DIAG318;
3280         kvm_s390_set_prefix(vcpu, 0);
3281         if (test_kvm_facility(vcpu->kvm, 64))
3282                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3283         if (test_kvm_facility(vcpu->kvm, 82))
3284                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3285         if (test_kvm_facility(vcpu->kvm, 133))
3286                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3287         if (test_kvm_facility(vcpu->kvm, 156))
3288                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3289         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3290          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3291          */
3292         if (MACHINE_HAS_VX)
3293                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3294         else
3295                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3296
3297         if (kvm_is_ucontrol(vcpu->kvm)) {
3298                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3299                 if (rc)
3300                         goto out_free_sie_block;
3301         }
3302
3303         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3304                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3305         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3306
3307         rc = kvm_s390_vcpu_setup(vcpu);
3308         if (rc)
3309                 goto out_ucontrol_uninit;
3310         return 0;
3311
3312 out_ucontrol_uninit:
3313         if (kvm_is_ucontrol(vcpu->kvm))
3314                 gmap_remove(vcpu->arch.gmap);
3315 out_free_sie_block:
3316         free_page((unsigned long)(vcpu->arch.sie_block));
3317         return rc;
3318 }
3319
3320 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3321 {
3322         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3323         return kvm_s390_vcpu_has_irq(vcpu, 0);
3324 }
3325
3326 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3327 {
3328         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3329 }
3330
3331 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3332 {
3333         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3334         exit_sie(vcpu);
3335 }
3336
3337 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3338 {
3339         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3340 }
3341
3342 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3343 {
3344         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3345         exit_sie(vcpu);
3346 }
3347
3348 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3349 {
3350         return atomic_read(&vcpu->arch.sie_block->prog20) &
3351                (PROG_BLOCK_SIE | PROG_REQUEST);
3352 }
3353
3354 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3355 {
3356         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3357 }
3358
3359 /*
3360  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3361  * If the CPU is not running (e.g. waiting as idle) the function will
3362  * return immediately. */
3363 void exit_sie(struct kvm_vcpu *vcpu)
3364 {
3365         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3366         kvm_s390_vsie_kick(vcpu);
3367         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3368                 cpu_relax();
3369 }
3370
3371 /* Kick a guest cpu out of SIE to process a request synchronously */
3372 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3373 {
3374         kvm_make_request(req, vcpu);
3375         kvm_s390_vcpu_request(vcpu);
3376 }
3377
3378 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3379                               unsigned long end)
3380 {
3381         struct kvm *kvm = gmap->private;
3382         struct kvm_vcpu *vcpu;
3383         unsigned long prefix;
3384         unsigned long i;
3385
3386         if (gmap_is_shadow(gmap))
3387                 return;
3388         if (start >= 1UL << 31)
3389                 /* We are only interested in prefix pages */
3390                 return;
3391         kvm_for_each_vcpu(i, vcpu, kvm) {
3392                 /* match against both prefix pages */
3393                 prefix = kvm_s390_get_prefix(vcpu);
3394                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3395                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3396                                    start, end);
3397                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3398                 }
3399         }
3400 }
3401
3402 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3403 {
3404         /* do not poll with more than halt_poll_max_steal percent of steal time */
3405         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3406             READ_ONCE(halt_poll_max_steal)) {
3407                 vcpu->stat.halt_no_poll_steal++;
3408                 return true;
3409         }
3410         return false;
3411 }
3412
3413 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3414 {
3415         /* kvm common code refers to this, but never calls it */
3416         BUG();
3417         return 0;
3418 }
3419
3420 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3421                                            struct kvm_one_reg *reg)
3422 {
3423         int r = -EINVAL;
3424
3425         switch (reg->id) {
3426         case KVM_REG_S390_TODPR:
3427                 r = put_user(vcpu->arch.sie_block->todpr,
3428                              (u32 __user *)reg->addr);
3429                 break;
3430         case KVM_REG_S390_EPOCHDIFF:
3431                 r = put_user(vcpu->arch.sie_block->epoch,
3432                              (u64 __user *)reg->addr);
3433                 break;
3434         case KVM_REG_S390_CPU_TIMER:
3435                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3436                              (u64 __user *)reg->addr);
3437                 break;
3438         case KVM_REG_S390_CLOCK_COMP:
3439                 r = put_user(vcpu->arch.sie_block->ckc,
3440                              (u64 __user *)reg->addr);
3441                 break;
3442         case KVM_REG_S390_PFTOKEN:
3443                 r = put_user(vcpu->arch.pfault_token,
3444                              (u64 __user *)reg->addr);
3445                 break;
3446         case KVM_REG_S390_PFCOMPARE:
3447                 r = put_user(vcpu->arch.pfault_compare,
3448                              (u64 __user *)reg->addr);
3449                 break;
3450         case KVM_REG_S390_PFSELECT:
3451                 r = put_user(vcpu->arch.pfault_select,
3452                              (u64 __user *)reg->addr);
3453                 break;
3454         case KVM_REG_S390_PP:
3455                 r = put_user(vcpu->arch.sie_block->pp,
3456                              (u64 __user *)reg->addr);
3457                 break;
3458         case KVM_REG_S390_GBEA:
3459                 r = put_user(vcpu->arch.sie_block->gbea,
3460                              (u64 __user *)reg->addr);
3461                 break;
3462         default:
3463                 break;
3464         }
3465
3466         return r;
3467 }
3468
3469 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3470                                            struct kvm_one_reg *reg)
3471 {
3472         int r = -EINVAL;
3473         __u64 val;
3474
3475         switch (reg->id) {
3476         case KVM_REG_S390_TODPR:
3477                 r = get_user(vcpu->arch.sie_block->todpr,
3478                              (u32 __user *)reg->addr);
3479                 break;
3480         case KVM_REG_S390_EPOCHDIFF:
3481                 r = get_user(vcpu->arch.sie_block->epoch,
3482                              (u64 __user *)reg->addr);
3483                 break;
3484         case KVM_REG_S390_CPU_TIMER:
3485                 r = get_user(val, (u64 __user *)reg->addr);
3486                 if (!r)
3487                         kvm_s390_set_cpu_timer(vcpu, val);
3488                 break;
3489         case KVM_REG_S390_CLOCK_COMP:
3490                 r = get_user(vcpu->arch.sie_block->ckc,
3491                              (u64 __user *)reg->addr);
3492                 break;
3493         case KVM_REG_S390_PFTOKEN:
3494                 r = get_user(vcpu->arch.pfault_token,
3495                              (u64 __user *)reg->addr);
3496                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3497                         kvm_clear_async_pf_completion_queue(vcpu);
3498                 break;
3499         case KVM_REG_S390_PFCOMPARE:
3500                 r = get_user(vcpu->arch.pfault_compare,
3501                              (u64 __user *)reg->addr);
3502                 break;
3503         case KVM_REG_S390_PFSELECT:
3504                 r = get_user(vcpu->arch.pfault_select,
3505                              (u64 __user *)reg->addr);
3506                 break;
3507         case KVM_REG_S390_PP:
3508                 r = get_user(vcpu->arch.sie_block->pp,
3509                              (u64 __user *)reg->addr);
3510                 break;
3511         case KVM_REG_S390_GBEA:
3512                 r = get_user(vcpu->arch.sie_block->gbea,
3513                              (u64 __user *)reg->addr);
3514                 break;
3515         default:
3516                 break;
3517         }
3518
3519         return r;
3520 }
3521
3522 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3523 {
3524         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3525         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3526         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3527
3528         kvm_clear_async_pf_completion_queue(vcpu);
3529         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3530                 kvm_s390_vcpu_stop(vcpu);
3531         kvm_s390_clear_local_irqs(vcpu);
3532 }
3533
3534 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3535 {
3536         /* Initial reset is a superset of the normal reset */
3537         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3538
3539         /*
3540          * This equals initial cpu reset in pop, but we don't switch to ESA.
3541          * We do not only reset the internal data, but also ...
3542          */
3543         vcpu->arch.sie_block->gpsw.mask = 0;
3544         vcpu->arch.sie_block->gpsw.addr = 0;
3545         kvm_s390_set_prefix(vcpu, 0);
3546         kvm_s390_set_cpu_timer(vcpu, 0);
3547         vcpu->arch.sie_block->ckc = 0;
3548         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3549         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3550         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3551
3552         /* ... the data in sync regs */
3553         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3554         vcpu->run->s.regs.ckc = 0;
3555         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3556         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3557         vcpu->run->psw_addr = 0;
3558         vcpu->run->psw_mask = 0;
3559         vcpu->run->s.regs.todpr = 0;
3560         vcpu->run->s.regs.cputm = 0;
3561         vcpu->run->s.regs.ckc = 0;
3562         vcpu->run->s.regs.pp = 0;
3563         vcpu->run->s.regs.gbea = 1;
3564         vcpu->run->s.regs.fpc = 0;
3565         /*
3566          * Do not reset these registers in the protected case, as some of
3567          * them are overlayed and they are not accessible in this case
3568          * anyway.
3569          */
3570         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3571                 vcpu->arch.sie_block->gbea = 1;
3572                 vcpu->arch.sie_block->pp = 0;
3573                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3574                 vcpu->arch.sie_block->todpr = 0;
3575         }
3576 }
3577
3578 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3579 {
3580         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3581
3582         /* Clear reset is a superset of the initial reset */
3583         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3584
3585         memset(&regs->gprs, 0, sizeof(regs->gprs));
3586         memset(&regs->vrs, 0, sizeof(regs->vrs));
3587         memset(&regs->acrs, 0, sizeof(regs->acrs));
3588         memset(&regs->gscb, 0, sizeof(regs->gscb));
3589
3590         regs->etoken = 0;
3591         regs->etoken_extension = 0;
3592 }
3593
3594 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3595 {
3596         vcpu_load(vcpu);
3597         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3598         vcpu_put(vcpu);
3599         return 0;
3600 }
3601
3602 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3603 {
3604         vcpu_load(vcpu);
3605         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3606         vcpu_put(vcpu);
3607         return 0;
3608 }
3609
3610 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3611                                   struct kvm_sregs *sregs)
3612 {
3613         vcpu_load(vcpu);
3614
3615         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3616         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3617
3618         vcpu_put(vcpu);
3619         return 0;
3620 }
3621
3622 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3623                                   struct kvm_sregs *sregs)
3624 {
3625         vcpu_load(vcpu);
3626
3627         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3628         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3629
3630         vcpu_put(vcpu);
3631         return 0;
3632 }
3633
3634 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3635 {
3636         int ret = 0;
3637
3638         vcpu_load(vcpu);
3639
3640         if (test_fp_ctl(fpu->fpc)) {
3641                 ret = -EINVAL;
3642                 goto out;
3643         }
3644         vcpu->run->s.regs.fpc = fpu->fpc;
3645         if (MACHINE_HAS_VX)
3646                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3647                                  (freg_t *) fpu->fprs);
3648         else
3649                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3650
3651 out:
3652         vcpu_put(vcpu);
3653         return ret;
3654 }
3655
3656 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3657 {
3658         vcpu_load(vcpu);
3659
3660         /* make sure we have the latest values */
3661         save_fpu_regs();
3662         if (MACHINE_HAS_VX)
3663                 convert_vx_to_fp((freg_t *) fpu->fprs,
3664                                  (__vector128 *) vcpu->run->s.regs.vrs);
3665         else
3666                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3667         fpu->fpc = vcpu->run->s.regs.fpc;
3668
3669         vcpu_put(vcpu);
3670         return 0;
3671 }
3672
3673 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3674 {
3675         int rc = 0;
3676
3677         if (!is_vcpu_stopped(vcpu))
3678                 rc = -EBUSY;
3679         else {
3680                 vcpu->run->psw_mask = psw.mask;
3681                 vcpu->run->psw_addr = psw.addr;
3682         }
3683         return rc;
3684 }
3685
3686 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3687                                   struct kvm_translation *tr)
3688 {
3689         return -EINVAL; /* not implemented yet */
3690 }
3691
3692 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3693                               KVM_GUESTDBG_USE_HW_BP | \
3694                               KVM_GUESTDBG_ENABLE)
3695
3696 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3697                                         struct kvm_guest_debug *dbg)
3698 {
3699         int rc = 0;
3700
3701         vcpu_load(vcpu);
3702
3703         vcpu->guest_debug = 0;
3704         kvm_s390_clear_bp_data(vcpu);
3705
3706         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3707                 rc = -EINVAL;
3708                 goto out;
3709         }
3710         if (!sclp.has_gpere) {
3711                 rc = -EINVAL;
3712                 goto out;
3713         }
3714
3715         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3716                 vcpu->guest_debug = dbg->control;
3717                 /* enforce guest PER */
3718                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3719
3720                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3721                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3722         } else {
3723                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3724                 vcpu->arch.guestdbg.last_bp = 0;
3725         }
3726
3727         if (rc) {
3728                 vcpu->guest_debug = 0;
3729                 kvm_s390_clear_bp_data(vcpu);
3730                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3731         }
3732
3733 out:
3734         vcpu_put(vcpu);
3735         return rc;
3736 }
3737
3738 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3739                                     struct kvm_mp_state *mp_state)
3740 {
3741         int ret;
3742
3743         vcpu_load(vcpu);
3744
3745         /* CHECK_STOP and LOAD are not supported yet */
3746         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3747                                       KVM_MP_STATE_OPERATING;
3748
3749         vcpu_put(vcpu);
3750         return ret;
3751 }
3752
3753 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3754                                     struct kvm_mp_state *mp_state)
3755 {
3756         int rc = 0;
3757
3758         vcpu_load(vcpu);
3759
3760         /* user space knows about this interface - let it control the state */
3761         kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3762
3763         switch (mp_state->mp_state) {
3764         case KVM_MP_STATE_STOPPED:
3765                 rc = kvm_s390_vcpu_stop(vcpu);
3766                 break;
3767         case KVM_MP_STATE_OPERATING:
3768                 rc = kvm_s390_vcpu_start(vcpu);
3769                 break;
3770         case KVM_MP_STATE_LOAD:
3771                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3772                         rc = -ENXIO;
3773                         break;
3774                 }
3775                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3776                 break;
3777         case KVM_MP_STATE_CHECK_STOP:
3778                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3779         default:
3780                 rc = -ENXIO;
3781         }
3782
3783         vcpu_put(vcpu);
3784         return rc;
3785 }
3786
3787 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3788 {
3789         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3790 }
3791
3792 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3793 {
3794 retry:
3795         kvm_s390_vcpu_request_handled(vcpu);
3796         if (!kvm_request_pending(vcpu))
3797                 return 0;
3798         /*
3799          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3800          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3801          * This ensures that the ipte instruction for this request has
3802          * already finished. We might race against a second unmapper that
3803          * wants to set the blocking bit. Lets just retry the request loop.
3804          */
3805         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3806                 int rc;
3807                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3808                                           kvm_s390_get_prefix(vcpu),
3809                                           PAGE_SIZE * 2, PROT_WRITE);
3810                 if (rc) {
3811                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3812                         return rc;
3813                 }
3814                 goto retry;
3815         }
3816
3817         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3818                 vcpu->arch.sie_block->ihcpu = 0xffff;
3819                 goto retry;
3820         }
3821
3822         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3823                 if (!ibs_enabled(vcpu)) {
3824                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3825                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3826                 }
3827                 goto retry;
3828         }
3829
3830         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3831                 if (ibs_enabled(vcpu)) {
3832                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3833                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3834                 }
3835                 goto retry;
3836         }
3837
3838         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3839                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3840                 goto retry;
3841         }
3842
3843         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3844                 /*
3845                  * Disable CMM virtualization; we will emulate the ESSA
3846                  * instruction manually, in order to provide additional
3847                  * functionalities needed for live migration.
3848                  */
3849                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3850                 goto retry;
3851         }
3852
3853         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3854                 /*
3855                  * Re-enable CMM virtualization if CMMA is available and
3856                  * CMM has been used.
3857                  */
3858                 if ((vcpu->kvm->arch.use_cmma) &&
3859                     (vcpu->kvm->mm->context.uses_cmm))
3860                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3861                 goto retry;
3862         }
3863
3864         /* nothing to do, just clear the request */
3865         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3866         /* we left the vsie handler, nothing to do, just clear the request */
3867         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3868
3869         return 0;
3870 }
3871
3872 void kvm_s390_set_tod_clock(struct kvm *kvm,
3873                             const struct kvm_s390_vm_tod_clock *gtod)
3874 {
3875         struct kvm_vcpu *vcpu;
3876         union tod_clock clk;
3877         unsigned long i;
3878
3879         mutex_lock(&kvm->lock);
3880         preempt_disable();
3881
3882         store_tod_clock_ext(&clk);
3883
3884         kvm->arch.epoch = gtod->tod - clk.tod;
3885         kvm->arch.epdx = 0;
3886         if (test_kvm_facility(kvm, 139)) {
3887                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3888                 if (kvm->arch.epoch > gtod->tod)
3889                         kvm->arch.epdx -= 1;
3890         }
3891
3892         kvm_s390_vcpu_block_all(kvm);
3893         kvm_for_each_vcpu(i, vcpu, kvm) {
3894                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3895                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3896         }
3897
3898         kvm_s390_vcpu_unblock_all(kvm);
3899         preempt_enable();
3900         mutex_unlock(&kvm->lock);
3901 }
3902
3903 /**
3904  * kvm_arch_fault_in_page - fault-in guest page if necessary
3905  * @vcpu: The corresponding virtual cpu
3906  * @gpa: Guest physical address
3907  * @writable: Whether the page should be writable or not
3908  *
3909  * Make sure that a guest page has been faulted-in on the host.
3910  *
3911  * Return: Zero on success, negative error code otherwise.
3912  */
3913 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3914 {
3915         return gmap_fault(vcpu->arch.gmap, gpa,
3916                           writable ? FAULT_FLAG_WRITE : 0);
3917 }
3918
3919 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3920                                       unsigned long token)
3921 {
3922         struct kvm_s390_interrupt inti;
3923         struct kvm_s390_irq irq;
3924
3925         if (start_token) {
3926                 irq.u.ext.ext_params2 = token;
3927                 irq.type = KVM_S390_INT_PFAULT_INIT;
3928                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3929         } else {
3930                 inti.type = KVM_S390_INT_PFAULT_DONE;
3931                 inti.parm64 = token;
3932                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3933         }
3934 }
3935
3936 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3937                                      struct kvm_async_pf *work)
3938 {
3939         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3940         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3941
3942         return true;
3943 }
3944
3945 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3946                                  struct kvm_async_pf *work)
3947 {
3948         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3949         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3950 }
3951
3952 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3953                                struct kvm_async_pf *work)
3954 {
3955         /* s390 will always inject the page directly */
3956 }
3957
3958 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
3959 {
3960         /*
3961          * s390 will always inject the page directly,
3962          * but we still want check_async_completion to cleanup
3963          */
3964         return true;
3965 }
3966
3967 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3968 {
3969         hva_t hva;
3970         struct kvm_arch_async_pf arch;
3971
3972         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3973                 return false;
3974         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3975             vcpu->arch.pfault_compare)
3976                 return false;
3977         if (psw_extint_disabled(vcpu))
3978                 return false;
3979         if (kvm_s390_vcpu_has_irq(vcpu, 0))
3980                 return false;
3981         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
3982                 return false;
3983         if (!vcpu->arch.gmap->pfault_enabled)
3984                 return false;
3985
3986         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3987         hva += current->thread.gmap_addr & ~PAGE_MASK;
3988         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3989                 return false;
3990
3991         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3992 }
3993
3994 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3995 {
3996         int rc, cpuflags;
3997
3998         /*
3999          * On s390 notifications for arriving pages will be delivered directly
4000          * to the guest but the house keeping for completed pfaults is
4001          * handled outside the worker.
4002          */
4003         kvm_check_async_pf_completion(vcpu);
4004
4005         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4006         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4007
4008         if (need_resched())
4009                 schedule();
4010
4011         if (!kvm_is_ucontrol(vcpu->kvm)) {
4012                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4013                 if (rc)
4014                         return rc;
4015         }
4016
4017         rc = kvm_s390_handle_requests(vcpu);
4018         if (rc)
4019                 return rc;
4020
4021         if (guestdbg_enabled(vcpu)) {
4022                 kvm_s390_backup_guest_per_regs(vcpu);
4023                 kvm_s390_patch_guest_per_regs(vcpu);
4024         }
4025
4026         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4027
4028         vcpu->arch.sie_block->icptcode = 0;
4029         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4030         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4031         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4032
4033         return 0;
4034 }
4035
4036 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4037 {
4038         struct kvm_s390_pgm_info pgm_info = {
4039                 .code = PGM_ADDRESSING,
4040         };
4041         u8 opcode, ilen;
4042         int rc;
4043
4044         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4045         trace_kvm_s390_sie_fault(vcpu);
4046
4047         /*
4048          * We want to inject an addressing exception, which is defined as a
4049          * suppressing or terminating exception. However, since we came here
4050          * by a DAT access exception, the PSW still points to the faulting
4051          * instruction since DAT exceptions are nullifying. So we've got
4052          * to look up the current opcode to get the length of the instruction
4053          * to be able to forward the PSW.
4054          */
4055         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4056         ilen = insn_length(opcode);
4057         if (rc < 0) {
4058                 return rc;
4059         } else if (rc) {
4060                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4061                  * Forward by arbitrary ilc, injection will take care of
4062                  * nullification if necessary.
4063                  */
4064                 pgm_info = vcpu->arch.pgm;
4065                 ilen = 4;
4066         }
4067         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4068         kvm_s390_forward_psw(vcpu, ilen);
4069         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4070 }
4071
4072 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4073 {
4074         struct mcck_volatile_info *mcck_info;
4075         struct sie_page *sie_page;
4076
4077         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4078                    vcpu->arch.sie_block->icptcode);
4079         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4080
4081         if (guestdbg_enabled(vcpu))
4082                 kvm_s390_restore_guest_per_regs(vcpu);
4083
4084         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4085         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4086
4087         if (exit_reason == -EINTR) {
4088                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4089                 sie_page = container_of(vcpu->arch.sie_block,
4090                                         struct sie_page, sie_block);
4091                 mcck_info = &sie_page->mcck_info;
4092                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4093                 return 0;
4094         }
4095
4096         if (vcpu->arch.sie_block->icptcode > 0) {
4097                 int rc = kvm_handle_sie_intercept(vcpu);
4098
4099                 if (rc != -EOPNOTSUPP)
4100                         return rc;
4101                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4102                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4103                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4104                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4105                 return -EREMOTE;
4106         } else if (exit_reason != -EFAULT) {
4107                 vcpu->stat.exit_null++;
4108                 return 0;
4109         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4110                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4111                 vcpu->run->s390_ucontrol.trans_exc_code =
4112                                                 current->thread.gmap_addr;
4113                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4114                 return -EREMOTE;
4115         } else if (current->thread.gmap_pfault) {
4116                 trace_kvm_s390_major_guest_pfault(vcpu);
4117                 current->thread.gmap_pfault = 0;
4118                 if (kvm_arch_setup_async_pf(vcpu))
4119                         return 0;
4120                 vcpu->stat.pfault_sync++;
4121                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4122         }
4123         return vcpu_post_run_fault_in_sie(vcpu);
4124 }
4125
4126 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4127 static int __vcpu_run(struct kvm_vcpu *vcpu)
4128 {
4129         int rc, exit_reason;
4130         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4131
4132         /*
4133          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4134          * ning the guest), so that memslots (and other stuff) are protected
4135          */
4136         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4137
4138         do {
4139                 rc = vcpu_pre_run(vcpu);
4140                 if (rc)
4141                         break;
4142
4143                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4144                 /*
4145                  * As PF_VCPU will be used in fault handler, between
4146                  * guest_enter and guest_exit should be no uaccess.
4147                  */
4148                 local_irq_disable();
4149                 guest_enter_irqoff();
4150                 __disable_cpu_timer_accounting(vcpu);
4151                 local_irq_enable();
4152                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4153                         memcpy(sie_page->pv_grregs,
4154                                vcpu->run->s.regs.gprs,
4155                                sizeof(sie_page->pv_grregs));
4156                 }
4157                 if (test_cpu_flag(CIF_FPU))
4158                         load_fpu_regs();
4159                 exit_reason = sie64a(vcpu->arch.sie_block,
4160                                      vcpu->run->s.regs.gprs);
4161                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4162                         memcpy(vcpu->run->s.regs.gprs,
4163                                sie_page->pv_grregs,
4164                                sizeof(sie_page->pv_grregs));
4165                         /*
4166                          * We're not allowed to inject interrupts on intercepts
4167                          * that leave the guest state in an "in-between" state
4168                          * where the next SIE entry will do a continuation.
4169                          * Fence interrupts in our "internal" PSW.
4170                          */
4171                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4172                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4173                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4174                         }
4175                 }
4176                 local_irq_disable();
4177                 __enable_cpu_timer_accounting(vcpu);
4178                 guest_exit_irqoff();
4179                 local_irq_enable();
4180                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4181
4182                 rc = vcpu_post_run(vcpu, exit_reason);
4183         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4184
4185         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4186         return rc;
4187 }
4188
4189 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4190 {
4191         struct kvm_run *kvm_run = vcpu->run;
4192         struct runtime_instr_cb *riccb;
4193         struct gs_cb *gscb;
4194
4195         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4196         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4197         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4198         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4199         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4200                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4201                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4202                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4203         }
4204         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4205                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4206                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4207                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4208                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4209                         kvm_clear_async_pf_completion_queue(vcpu);
4210         }
4211         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4212                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4213                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4214                 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4215         }
4216         /*
4217          * If userspace sets the riccb (e.g. after migration) to a valid state,
4218          * we should enable RI here instead of doing the lazy enablement.
4219          */
4220         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4221             test_kvm_facility(vcpu->kvm, 64) &&
4222             riccb->v &&
4223             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4224                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4225                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4226         }
4227         /*
4228          * If userspace sets the gscb (e.g. after migration) to non-zero,
4229          * we should enable GS here instead of doing the lazy enablement.
4230          */
4231         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4232             test_kvm_facility(vcpu->kvm, 133) &&
4233             gscb->gssm &&
4234             !vcpu->arch.gs_enabled) {
4235                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4236                 vcpu->arch.sie_block->ecb |= ECB_GS;
4237                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4238                 vcpu->arch.gs_enabled = 1;
4239         }
4240         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4241             test_kvm_facility(vcpu->kvm, 82)) {
4242                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4243                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4244         }
4245         if (MACHINE_HAS_GS) {
4246                 preempt_disable();
4247                 __ctl_set_bit(2, 4);
4248                 if (current->thread.gs_cb) {
4249                         vcpu->arch.host_gscb = current->thread.gs_cb;
4250                         save_gs_cb(vcpu->arch.host_gscb);
4251                 }
4252                 if (vcpu->arch.gs_enabled) {
4253                         current->thread.gs_cb = (struct gs_cb *)
4254                                                 &vcpu->run->s.regs.gscb;
4255                         restore_gs_cb(current->thread.gs_cb);
4256                 }
4257                 preempt_enable();
4258         }
4259         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4260 }
4261
4262 static void sync_regs(struct kvm_vcpu *vcpu)
4263 {
4264         struct kvm_run *kvm_run = vcpu->run;
4265
4266         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4267                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4268         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4269                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4270                 /* some control register changes require a tlb flush */
4271                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4272         }
4273         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4274                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4275                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4276         }
4277         save_access_regs(vcpu->arch.host_acrs);
4278         restore_access_regs(vcpu->run->s.regs.acrs);
4279         /* save host (userspace) fprs/vrs */
4280         save_fpu_regs();
4281         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4282         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4283         if (MACHINE_HAS_VX)
4284                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4285         else
4286                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4287         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4288         if (test_fp_ctl(current->thread.fpu.fpc))
4289                 /* User space provided an invalid FPC, let's clear it */
4290                 current->thread.fpu.fpc = 0;
4291
4292         /* Sync fmt2 only data */
4293         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4294                 sync_regs_fmt2(vcpu);
4295         } else {
4296                 /*
4297                  * In several places we have to modify our internal view to
4298                  * not do things that are disallowed by the ultravisor. For
4299                  * example we must not inject interrupts after specific exits
4300                  * (e.g. 112 prefix page not secure). We do this by turning
4301                  * off the machine check, external and I/O interrupt bits
4302                  * of our PSW copy. To avoid getting validity intercepts, we
4303                  * do only accept the condition code from userspace.
4304                  */
4305                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4306                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4307                                                    PSW_MASK_CC;
4308         }
4309
4310         kvm_run->kvm_dirty_regs = 0;
4311 }
4312
4313 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4314 {
4315         struct kvm_run *kvm_run = vcpu->run;
4316
4317         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4318         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4319         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4320         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4321         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4322         if (MACHINE_HAS_GS) {
4323                 preempt_disable();
4324                 __ctl_set_bit(2, 4);
4325                 if (vcpu->arch.gs_enabled)
4326                         save_gs_cb(current->thread.gs_cb);
4327                 current->thread.gs_cb = vcpu->arch.host_gscb;
4328                 restore_gs_cb(vcpu->arch.host_gscb);
4329                 if (!vcpu->arch.host_gscb)
4330                         __ctl_clear_bit(2, 4);
4331                 vcpu->arch.host_gscb = NULL;
4332                 preempt_enable();
4333         }
4334         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4335 }
4336
4337 static void store_regs(struct kvm_vcpu *vcpu)
4338 {
4339         struct kvm_run *kvm_run = vcpu->run;
4340
4341         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4342         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4343         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4344         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4345         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4346         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4347         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4348         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4349         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4350         save_access_regs(vcpu->run->s.regs.acrs);
4351         restore_access_regs(vcpu->arch.host_acrs);
4352         /* Save guest register state */
4353         save_fpu_regs();
4354         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4355         /* Restore will be done lazily at return */
4356         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4357         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4358         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4359                 store_regs_fmt2(vcpu);
4360 }
4361
4362 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4363 {
4364         struct kvm_run *kvm_run = vcpu->run;
4365         int rc;
4366
4367         if (kvm_run->immediate_exit)
4368                 return -EINTR;
4369
4370         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4371             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4372                 return -EINVAL;
4373
4374         vcpu_load(vcpu);
4375
4376         if (guestdbg_exit_pending(vcpu)) {
4377                 kvm_s390_prepare_debug_exit(vcpu);
4378                 rc = 0;
4379                 goto out;
4380         }
4381
4382         kvm_sigset_activate(vcpu);
4383
4384         /*
4385          * no need to check the return value of vcpu_start as it can only have
4386          * an error for protvirt, but protvirt means user cpu state
4387          */
4388         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4389                 kvm_s390_vcpu_start(vcpu);
4390         } else if (is_vcpu_stopped(vcpu)) {
4391                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4392                                    vcpu->vcpu_id);
4393                 rc = -EINVAL;
4394                 goto out;
4395         }
4396
4397         sync_regs(vcpu);
4398         enable_cpu_timer_accounting(vcpu);
4399
4400         might_fault();
4401         rc = __vcpu_run(vcpu);
4402
4403         if (signal_pending(current) && !rc) {
4404                 kvm_run->exit_reason = KVM_EXIT_INTR;
4405                 rc = -EINTR;
4406         }
4407
4408         if (guestdbg_exit_pending(vcpu) && !rc)  {
4409                 kvm_s390_prepare_debug_exit(vcpu);
4410                 rc = 0;
4411         }
4412
4413         if (rc == -EREMOTE) {
4414                 /* userspace support is needed, kvm_run has been prepared */
4415                 rc = 0;
4416         }
4417
4418         disable_cpu_timer_accounting(vcpu);
4419         store_regs(vcpu);
4420
4421         kvm_sigset_deactivate(vcpu);
4422
4423         vcpu->stat.exit_userspace++;
4424 out:
4425         vcpu_put(vcpu);
4426         return rc;
4427 }
4428
4429 /*
4430  * store status at address
4431  * we use have two special cases:
4432  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4433  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4434  */
4435 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4436 {
4437         unsigned char archmode = 1;
4438         freg_t fprs[NUM_FPRS];
4439         unsigned int px;
4440         u64 clkcomp, cputm;
4441         int rc;
4442
4443         px = kvm_s390_get_prefix(vcpu);
4444         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4445                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4446                         return -EFAULT;
4447                 gpa = 0;
4448         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4449                 if (write_guest_real(vcpu, 163, &archmode, 1))
4450                         return -EFAULT;
4451                 gpa = px;
4452         } else
4453                 gpa -= __LC_FPREGS_SAVE_AREA;
4454
4455         /* manually convert vector registers if necessary */
4456         if (MACHINE_HAS_VX) {
4457                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4458                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4459                                      fprs, 128);
4460         } else {
4461                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4462                                      vcpu->run->s.regs.fprs, 128);
4463         }
4464         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4465                               vcpu->run->s.regs.gprs, 128);
4466         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4467                               &vcpu->arch.sie_block->gpsw, 16);
4468         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4469                               &px, 4);
4470         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4471                               &vcpu->run->s.regs.fpc, 4);
4472         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4473                               &vcpu->arch.sie_block->todpr, 4);
4474         cputm = kvm_s390_get_cpu_timer(vcpu);
4475         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4476                               &cputm, 8);
4477         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4478         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4479                               &clkcomp, 8);
4480         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4481                               &vcpu->run->s.regs.acrs, 64);
4482         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4483                               &vcpu->arch.sie_block->gcr, 128);
4484         return rc ? -EFAULT : 0;
4485 }
4486
4487 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4488 {
4489         /*
4490          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4491          * switch in the run ioctl. Let's update our copies before we save
4492          * it into the save area
4493          */
4494         save_fpu_regs();
4495         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4496         save_access_regs(vcpu->run->s.regs.acrs);
4497
4498         return kvm_s390_store_status_unloaded(vcpu, addr);
4499 }
4500
4501 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4502 {
4503         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4504         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4505 }
4506
4507 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4508 {
4509         unsigned long i;
4510         struct kvm_vcpu *vcpu;
4511
4512         kvm_for_each_vcpu(i, vcpu, kvm) {
4513                 __disable_ibs_on_vcpu(vcpu);
4514         }
4515 }
4516
4517 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4518 {
4519         if (!sclp.has_ibs)
4520                 return;
4521         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4522         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4523 }
4524
4525 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4526 {
4527         int i, online_vcpus, r = 0, started_vcpus = 0;
4528
4529         if (!is_vcpu_stopped(vcpu))
4530                 return 0;
4531
4532         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4533         /* Only one cpu at a time may enter/leave the STOPPED state. */
4534         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4535         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4536
4537         /* Let's tell the UV that we want to change into the operating state */
4538         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4539                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4540                 if (r) {
4541                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4542                         return r;
4543                 }
4544         }
4545
4546         for (i = 0; i < online_vcpus; i++) {
4547                 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4548                         started_vcpus++;
4549         }
4550
4551         if (started_vcpus == 0) {
4552                 /* we're the only active VCPU -> speed it up */
4553                 __enable_ibs_on_vcpu(vcpu);
4554         } else if (started_vcpus == 1) {
4555                 /*
4556                  * As we are starting a second VCPU, we have to disable
4557                  * the IBS facility on all VCPUs to remove potentially
4558                  * outstanding ENABLE requests.
4559                  */
4560                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4561         }
4562
4563         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4564         /*
4565          * The real PSW might have changed due to a RESTART interpreted by the
4566          * ultravisor. We block all interrupts and let the next sie exit
4567          * refresh our view.
4568          */
4569         if (kvm_s390_pv_cpu_is_protected(vcpu))
4570                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4571         /*
4572          * Another VCPU might have used IBS while we were offline.
4573          * Let's play safe and flush the VCPU at startup.
4574          */
4575         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4576         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4577         return 0;
4578 }
4579
4580 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4581 {
4582         int i, online_vcpus, r = 0, started_vcpus = 0;
4583         struct kvm_vcpu *started_vcpu = NULL;
4584
4585         if (is_vcpu_stopped(vcpu))
4586                 return 0;
4587
4588         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4589         /* Only one cpu at a time may enter/leave the STOPPED state. */
4590         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4591         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4592
4593         /* Let's tell the UV that we want to change into the stopped state */
4594         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4595                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4596                 if (r) {
4597                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4598                         return r;
4599                 }
4600         }
4601
4602         /*
4603          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4604          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4605          * have been fully processed. This will ensure that the VCPU
4606          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4607          */
4608         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4609         kvm_s390_clear_stop_irq(vcpu);
4610
4611         __disable_ibs_on_vcpu(vcpu);
4612
4613         for (i = 0; i < online_vcpus; i++) {
4614                 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4615
4616                 if (!is_vcpu_stopped(tmp)) {
4617                         started_vcpus++;
4618                         started_vcpu = tmp;
4619                 }
4620         }
4621
4622         if (started_vcpus == 1) {
4623                 /*
4624                  * As we only have one VCPU left, we want to enable the
4625                  * IBS facility for that VCPU to speed it up.
4626                  */
4627                 __enable_ibs_on_vcpu(started_vcpu);
4628         }
4629
4630         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4631         return 0;
4632 }
4633
4634 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4635                                      struct kvm_enable_cap *cap)
4636 {
4637         int r;
4638
4639         if (cap->flags)
4640                 return -EINVAL;
4641
4642         switch (cap->cap) {
4643         case KVM_CAP_S390_CSS_SUPPORT:
4644                 if (!vcpu->kvm->arch.css_support) {
4645                         vcpu->kvm->arch.css_support = 1;
4646                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4647                         trace_kvm_s390_enable_css(vcpu->kvm);
4648                 }
4649                 r = 0;
4650                 break;
4651         default:
4652                 r = -EINVAL;
4653                 break;
4654         }
4655         return r;
4656 }
4657
4658 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4659                                    struct kvm_s390_mem_op *mop)
4660 {
4661         void __user *uaddr = (void __user *)mop->buf;
4662         int r = 0;
4663
4664         if (mop->flags || !mop->size)
4665                 return -EINVAL;
4666         if (mop->size + mop->sida_offset < mop->size)
4667                 return -EINVAL;
4668         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4669                 return -E2BIG;
4670
4671         switch (mop->op) {
4672         case KVM_S390_MEMOP_SIDA_READ:
4673                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4674                                  mop->sida_offset), mop->size))
4675                         r = -EFAULT;
4676
4677                 break;
4678         case KVM_S390_MEMOP_SIDA_WRITE:
4679                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4680                                    mop->sida_offset), uaddr, mop->size))
4681                         r = -EFAULT;
4682                 break;
4683         }
4684         return r;
4685 }
4686 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4687                                   struct kvm_s390_mem_op *mop)
4688 {
4689         void __user *uaddr = (void __user *)mop->buf;
4690         void *tmpbuf = NULL;
4691         int r = 0;
4692         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4693                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4694
4695         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4696                 return -EINVAL;
4697
4698         if (mop->size > MEM_OP_MAX_SIZE)
4699                 return -E2BIG;
4700
4701         if (kvm_s390_pv_cpu_is_protected(vcpu))
4702                 return -EINVAL;
4703
4704         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4705                 tmpbuf = vmalloc(mop->size);
4706                 if (!tmpbuf)
4707                         return -ENOMEM;
4708         }
4709
4710         switch (mop->op) {
4711         case KVM_S390_MEMOP_LOGICAL_READ:
4712                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4713                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4714                                             mop->size, GACC_FETCH);
4715                         break;
4716                 }
4717                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4718                 if (r == 0) {
4719                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4720                                 r = -EFAULT;
4721                 }
4722                 break;
4723         case KVM_S390_MEMOP_LOGICAL_WRITE:
4724                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4725                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4726                                             mop->size, GACC_STORE);
4727                         break;
4728                 }
4729                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4730                         r = -EFAULT;
4731                         break;
4732                 }
4733                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4734                 break;
4735         }
4736
4737         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4738                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4739
4740         vfree(tmpbuf);
4741         return r;
4742 }
4743
4744 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4745                                       struct kvm_s390_mem_op *mop)
4746 {
4747         int r, srcu_idx;
4748
4749         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4750
4751         switch (mop->op) {
4752         case KVM_S390_MEMOP_LOGICAL_READ:
4753         case KVM_S390_MEMOP_LOGICAL_WRITE:
4754                 r = kvm_s390_guest_mem_op(vcpu, mop);
4755                 break;
4756         case KVM_S390_MEMOP_SIDA_READ:
4757         case KVM_S390_MEMOP_SIDA_WRITE:
4758                 /* we are locked against sida going away by the vcpu->mutex */
4759                 r = kvm_s390_guest_sida_op(vcpu, mop);
4760                 break;
4761         default:
4762                 r = -EINVAL;
4763         }
4764
4765         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4766         return r;
4767 }
4768
4769 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4770                                unsigned int ioctl, unsigned long arg)
4771 {
4772         struct kvm_vcpu *vcpu = filp->private_data;
4773         void __user *argp = (void __user *)arg;
4774
4775         switch (ioctl) {
4776         case KVM_S390_IRQ: {
4777                 struct kvm_s390_irq s390irq;
4778
4779                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4780                         return -EFAULT;
4781                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4782         }
4783         case KVM_S390_INTERRUPT: {
4784                 struct kvm_s390_interrupt s390int;
4785                 struct kvm_s390_irq s390irq = {};
4786
4787                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4788                         return -EFAULT;
4789                 if (s390int_to_s390irq(&s390int, &s390irq))
4790                         return -EINVAL;
4791                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4792         }
4793         }
4794         return -ENOIOCTLCMD;
4795 }
4796
4797 long kvm_arch_vcpu_ioctl(struct file *filp,
4798                          unsigned int ioctl, unsigned long arg)
4799 {
4800         struct kvm_vcpu *vcpu = filp->private_data;
4801         void __user *argp = (void __user *)arg;
4802         int idx;
4803         long r;
4804         u16 rc, rrc;
4805
4806         vcpu_load(vcpu);
4807
4808         switch (ioctl) {
4809         case KVM_S390_STORE_STATUS:
4810                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4811                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4812                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4813                 break;
4814         case KVM_S390_SET_INITIAL_PSW: {
4815                 psw_t psw;
4816
4817                 r = -EFAULT;
4818                 if (copy_from_user(&psw, argp, sizeof(psw)))
4819                         break;
4820                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4821                 break;
4822         }
4823         case KVM_S390_CLEAR_RESET:
4824                 r = 0;
4825                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4826                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4827                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4828                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4829                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4830                                    rc, rrc);
4831                 }
4832                 break;
4833         case KVM_S390_INITIAL_RESET:
4834                 r = 0;
4835                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4836                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4837                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4838                                           UVC_CMD_CPU_RESET_INITIAL,
4839                                           &rc, &rrc);
4840                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4841                                    rc, rrc);
4842                 }
4843                 break;
4844         case KVM_S390_NORMAL_RESET:
4845                 r = 0;
4846                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4847                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4848                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4849                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4850                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4851                                    rc, rrc);
4852                 }
4853                 break;
4854         case KVM_SET_ONE_REG:
4855         case KVM_GET_ONE_REG: {
4856                 struct kvm_one_reg reg;
4857                 r = -EINVAL;
4858                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4859                         break;
4860                 r = -EFAULT;
4861                 if (copy_from_user(&reg, argp, sizeof(reg)))
4862                         break;
4863                 if (ioctl == KVM_SET_ONE_REG)
4864                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4865                 else
4866                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4867                 break;
4868         }
4869 #ifdef CONFIG_KVM_S390_UCONTROL
4870         case KVM_S390_UCAS_MAP: {
4871                 struct kvm_s390_ucas_mapping ucasmap;
4872
4873                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4874                         r = -EFAULT;
4875                         break;
4876                 }
4877
4878                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4879                         r = -EINVAL;
4880                         break;
4881                 }
4882
4883                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4884                                      ucasmap.vcpu_addr, ucasmap.length);
4885                 break;
4886         }
4887         case KVM_S390_UCAS_UNMAP: {
4888                 struct kvm_s390_ucas_mapping ucasmap;
4889
4890                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4891                         r = -EFAULT;
4892                         break;
4893                 }
4894
4895                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4896                         r = -EINVAL;
4897                         break;
4898                 }
4899
4900                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4901                         ucasmap.length);
4902                 break;
4903         }
4904 #endif
4905         case KVM_S390_VCPU_FAULT: {
4906                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4907                 break;
4908         }
4909         case KVM_ENABLE_CAP:
4910         {
4911                 struct kvm_enable_cap cap;
4912                 r = -EFAULT;
4913                 if (copy_from_user(&cap, argp, sizeof(cap)))
4914                         break;
4915                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4916                 break;
4917         }
4918         case KVM_S390_MEM_OP: {
4919                 struct kvm_s390_mem_op mem_op;
4920
4921                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4922                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4923                 else
4924                         r = -EFAULT;
4925                 break;
4926         }
4927         case KVM_S390_SET_IRQ_STATE: {
4928                 struct kvm_s390_irq_state irq_state;
4929
4930                 r = -EFAULT;
4931                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4932                         break;
4933                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4934                     irq_state.len == 0 ||
4935                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4936                         r = -EINVAL;
4937                         break;
4938                 }
4939                 /* do not use irq_state.flags, it will break old QEMUs */
4940                 r = kvm_s390_set_irq_state(vcpu,
4941                                            (void __user *) irq_state.buf,
4942                                            irq_state.len);
4943                 break;
4944         }
4945         case KVM_S390_GET_IRQ_STATE: {
4946                 struct kvm_s390_irq_state irq_state;
4947
4948                 r = -EFAULT;
4949                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4950                         break;
4951                 if (irq_state.len == 0) {
4952                         r = -EINVAL;
4953                         break;
4954                 }
4955                 /* do not use irq_state.flags, it will break old QEMUs */
4956                 r = kvm_s390_get_irq_state(vcpu,
4957                                            (__u8 __user *)  irq_state.buf,
4958                                            irq_state.len);
4959                 break;
4960         }
4961         default:
4962                 r = -ENOTTY;
4963         }
4964
4965         vcpu_put(vcpu);
4966         return r;
4967 }
4968
4969 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
4970 {
4971 #ifdef CONFIG_KVM_S390_UCONTROL
4972         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
4973                  && (kvm_is_ucontrol(vcpu->kvm))) {
4974                 vmf->page = virt_to_page(vcpu->arch.sie_block);
4975                 get_page(vmf->page);
4976                 return 0;
4977         }
4978 #endif
4979         return VM_FAULT_SIGBUS;
4980 }
4981
4982 /* Section: memory related */
4983 int kvm_arch_prepare_memory_region(struct kvm *kvm,
4984                                    const struct kvm_memory_slot *old,
4985                                    struct kvm_memory_slot *new,
4986                                    enum kvm_mr_change change)
4987 {
4988         gpa_t size;
4989
4990         /* When we are protected, we should not change the memory slots */
4991         if (kvm_s390_pv_get_handle(kvm))
4992                 return -EINVAL;
4993
4994         if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
4995                 return 0;
4996
4997         /* A few sanity checks. We can have memory slots which have to be
4998            located/ended at a segment boundary (1MB). The memory in userland is
4999            ok to be fragmented into various different vmas. It is okay to mmap()
5000            and munmap() stuff in this slot after doing this call at any time */
5001
5002         if (new->userspace_addr & 0xffffful)
5003                 return -EINVAL;
5004
5005         size = new->npages * PAGE_SIZE;
5006         if (size & 0xffffful)
5007                 return -EINVAL;
5008
5009         if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5010                 return -EINVAL;
5011
5012         return 0;
5013 }
5014
5015 void kvm_arch_commit_memory_region(struct kvm *kvm,
5016                                 struct kvm_memory_slot *old,
5017                                 const struct kvm_memory_slot *new,
5018                                 enum kvm_mr_change change)
5019 {
5020         int rc = 0;
5021
5022         switch (change) {
5023         case KVM_MR_DELETE:
5024                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5025                                         old->npages * PAGE_SIZE);
5026                 break;
5027         case KVM_MR_MOVE:
5028                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5029                                         old->npages * PAGE_SIZE);
5030                 if (rc)
5031                         break;
5032                 fallthrough;
5033         case KVM_MR_CREATE:
5034                 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5035                                       new->base_gfn * PAGE_SIZE,
5036                                       new->npages * PAGE_SIZE);
5037                 break;
5038         case KVM_MR_FLAGS_ONLY:
5039                 break;
5040         default:
5041                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5042         }
5043         if (rc)
5044                 pr_warn("failed to commit memory region\n");
5045         return;
5046 }
5047
5048 static inline unsigned long nonhyp_mask(int i)
5049 {
5050         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5051
5052         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5053 }
5054
5055 static int __init kvm_s390_init(void)
5056 {
5057         int i;
5058
5059         if (!sclp.has_sief2) {
5060                 pr_info("SIE is not available\n");
5061                 return -ENODEV;
5062         }
5063
5064         if (nested && hpage) {
5065                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5066                 return -EINVAL;
5067         }
5068
5069         for (i = 0; i < 16; i++)
5070                 kvm_s390_fac_base[i] |=
5071                         stfle_fac_list[i] & nonhyp_mask(i);
5072
5073         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5074 }
5075
5076 static void __exit kvm_s390_exit(void)
5077 {
5078         kvm_exit();
5079 }
5080
5081 module_init(kvm_s390_init);
5082 module_exit(kvm_s390_exit);
5083
5084 /*
5085  * Enable autoloading of the kvm module.
5086  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5087  * since x86 takes a different approach.
5088  */
5089 #include <linux/miscdevice.h>
5090 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5091 MODULE_ALIAS("devname:kvm");