arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  10  *               Jason J. Herne <jjherne@us.ibm.com>
  11  */
  12
  13 #define KMSG_COMPONENT "kvm-s390"
  14 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  15
  16 #include <linux/compiler.h>
  17 #include <linux/err.h>
  18 #include <linux/fs.h>
  19 #include <linux/hrtimer.h>
  20 #include <linux/init.h>
  21 #include <linux/kvm.h>
  22 #include <linux/kvm_host.h>
  23 #include <linux/mman.h>
  24 #include <linux/module.h>
  25 #include <linux/moduleparam.h>
  26 #include <linux/random.h>
  27 #include <linux/slab.h>
  28 #include <linux/timer.h>
  29 #include <linux/vmalloc.h>
  30 #include <linux/bitmap.h>
  31 #include <linux/sched/signal.h>
  32 #include <linux/string.h>
  33 #include <linux/pgtable.h>
  34
  35 #include <asm/asm-offsets.h>
  36 #include <asm/lowcore.h>
  37 #include <asm/stp.h>
  38 #include <asm/gmap.h>
  39 #include <asm/nmi.h>
  40 #include <asm/switch_to.h>
  41 #include <asm/isc.h>
  42 #include <asm/sclp.h>
  43 #include <asm/cpacf.h>
  44 #include <asm/timex.h>
  45 #include <asm/ap.h>
  46 #include <asm/uv.h>
  47 #include <asm/fpu/api.h>
  48 #include "kvm-s390.h"
  49 #include "gaccess.h"
  50
  51 #define CREATE_TRACE_POINTS
  52 #include "trace.h"
  53 #include "trace-s390.h"
  54
  55 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  56 #define LOCAL_IRQS 32
  57 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  58                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  59
  60 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  61         KVM_GENERIC_VM_STATS(),
  62         STATS_DESC_COUNTER(VM, inject_io),
  63         STATS_DESC_COUNTER(VM, inject_float_mchk),
  64         STATS_DESC_COUNTER(VM, inject_pfault_done),
  65         STATS_DESC_COUNTER(VM, inject_service_signal),
  66         STATS_DESC_COUNTER(VM, inject_virtio)
  67 };
  68
  69 const struct kvm_stats_header kvm_vm_stats_header = {
  70         .name_size = KVM_STATS_NAME_SIZE,
  71         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  72         .id_offset = sizeof(struct kvm_stats_header),
  73         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  74         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  75                        sizeof(kvm_vm_stats_desc),
  76 };
  77
  78 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  79         KVM_GENERIC_VCPU_STATS(),
  80         STATS_DESC_COUNTER(VCPU, exit_userspace),
  81         STATS_DESC_COUNTER(VCPU, exit_null),
  82         STATS_DESC_COUNTER(VCPU, exit_external_request),
  83         STATS_DESC_COUNTER(VCPU, exit_io_request),
  84         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  85         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  86         STATS_DESC_COUNTER(VCPU, exit_validity),
  87         STATS_DESC_COUNTER(VCPU, exit_instruction),
  88         STATS_DESC_COUNTER(VCPU, exit_pei),
  89         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  90         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  91         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  92         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  93         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  94         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  95         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  96         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
  97         STATS_DESC_COUNTER(VCPU, deliver_ckc),
  98         STATS_DESC_COUNTER(VCPU, deliver_cputm),
  99         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 100         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 101         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 102         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 103         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 104         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 106         STATS_DESC_COUNTER(VCPU, deliver_program),
 107         STATS_DESC_COUNTER(VCPU, deliver_io),
 108         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 109         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 110         STATS_DESC_COUNTER(VCPU, inject_ckc),
 111         STATS_DESC_COUNTER(VCPU, inject_cputm),
 112         STATS_DESC_COUNTER(VCPU, inject_external_call),
 113         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 114         STATS_DESC_COUNTER(VCPU, inject_mchk),
 115         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 116         STATS_DESC_COUNTER(VCPU, inject_program),
 117         STATS_DESC_COUNTER(VCPU, inject_restart),
 118         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 119         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 120         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 121         STATS_DESC_COUNTER(VCPU, instruction_gs),
 122         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 123         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 124         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 125         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 126         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 127         STATS_DESC_COUNTER(VCPU, instruction_sck),
 128         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 129         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 130         STATS_DESC_COUNTER(VCPU, instruction_spx),
 131         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 132         STATS_DESC_COUNTER(VCPU, instruction_stap),
 133         STATS_DESC_COUNTER(VCPU, instruction_iske),
 134         STATS_DESC_COUNTER(VCPU, instruction_ri),
 135         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 136         STATS_DESC_COUNTER(VCPU, instruction_sske),
 137         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 138         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 139         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 140         STATS_DESC_COUNTER(VCPU, instruction_tb),
 141         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 142         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 143         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 144         STATS_DESC_COUNTER(VCPU, instruction_sie),
 145         STATS_DESC_COUNTER(VCPU, instruction_essa),
 146         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 147         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 148         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 149         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 163         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 164         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 165         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 166         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 167         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 168         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 169         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 170         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 172         STATS_DESC_COUNTER(VCPU, pfault_sync)
 173 };
 174
 175 const struct kvm_stats_header kvm_vcpu_stats_header = {
 176         .name_size = KVM_STATS_NAME_SIZE,
 177         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 178         .id_offset = sizeof(struct kvm_stats_header),
 179         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 180         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 181                        sizeof(kvm_vcpu_stats_desc),
 182 };
 183
 184 /* allow nested virtualization in KVM (if enabled by user space) */
 185 static int nested;
 186 module_param(nested, int, S_IRUGO);
 187 MODULE_PARM_DESC(nested, "Nested virtualization support");
 188
 189 /* allow 1m huge page guest backing, if !nested */
 190 static int hpage;
 191 module_param(hpage, int, 0444);
 192 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 193
 194 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 195 static u8 halt_poll_max_steal = 10;
 196 module_param(halt_poll_max_steal, byte, 0644);
 197 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 198
 199 /* if set to true, the GISA will be initialized and used if available */
 200 static bool use_gisa  = true;
 201 module_param(use_gisa, bool, 0644);
 202 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 203
 204 /* maximum diag9c forwarding per second */
 205 unsigned int diag9c_forwarding_hz;
 206 module_param(diag9c_forwarding_hz, uint, 0644);
 207 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 208
 209 /*
 210  * For now we handle at most 16 double words as this is what the s390 base
 211  * kernel handles and stores in the prefix page. If we ever need to go beyond
 212  * this, this requires changes to code, but the external uapi can stay.
 213  */
 214 #define SIZE_INTERNAL 16
 215
 216 /*
 217  * Base feature mask that defines default mask for facilities. Consists of the
 218  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 219  */
 220 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 221 /*
 222  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 223  * and defines the facilities that can be enabled via a cpu model.
 224  */
 225 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 226
 227 static unsigned long kvm_s390_fac_size(void)
 228 {
 229         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 230         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 231         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 232                 sizeof(stfle_fac_list));
 233
 234         return SIZE_INTERNAL;
 235 }
 236
 237 /* available cpu features supported by kvm */
 238 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 239 /* available subfunctions indicated via query / "test bit" */
 240 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 241
 242 static struct gmap_notifier gmap_notifier;
 243 static struct gmap_notifier vsie_gmap_notifier;
 244 debug_info_t *kvm_s390_dbf;
 245 debug_info_t *kvm_s390_dbf_uv;
 246
 247 /* Section: not file related */
 248 int kvm_arch_hardware_enable(void)
 249 {
 250         /* every s390 is virtualization enabled ;-) */
 251         return 0;
 252 }
 253
 254 int kvm_arch_check_processor_compat(void *opaque)
 255 {
 256         return 0;
 257 }
 258
 259 /* forward declarations */
 260 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 261                               unsigned long end);
 262 static int sca_switch_to_extended(struct kvm *kvm);
 263
 264 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 265 {
 266         u8 delta_idx = 0;
 267
 268         /*
 269          * The TOD jumps by delta, we have to compensate this by adding
 270          * -delta to the epoch.
 271          */
 272         delta = -delta;
 273
 274         /* sign-extension - we're adding to signed values below */
 275         if ((s64)delta < 0)
 276                 delta_idx = -1;
 277
 278         scb->epoch += delta;
 279         if (scb->ecd & ECD_MEF) {
 280                 scb->epdx += delta_idx;
 281                 if (scb->epoch < delta)
 282                         scb->epdx += 1;
 283         }
 284 }
 285
 286 /*
 287  * This callback is executed during stop_machine(). All CPUs are therefore
 288  * temporarily stopped. In order not to change guest behavior, we have to
 289  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 290  * so a CPU won't be stopped while calculating with the epoch.
 291  */
 292 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 293                           void *v)
 294 {
 295         struct kvm *kvm;
 296         struct kvm_vcpu *vcpu;
 297         unsigned long i;
 298         unsigned long long *delta = v;
 299
 300         list_for_each_entry(kvm, &vm_list, vm_list) {
 301                 kvm_for_each_vcpu(i, vcpu, kvm) {
 302                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 303                         if (i == 0) {
 304                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 305                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 306                         }
 307                         if (vcpu->arch.cputm_enabled)
 308                                 vcpu->arch.cputm_start += *delta;
 309                         if (vcpu->arch.vsie_block)
 310                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 311                                                    *delta);
 312                 }
 313         }
 314         return NOTIFY_OK;
 315 }
 316
 317 static struct notifier_block kvm_clock_notifier = {
 318         .notifier_call = kvm_clock_sync,
 319 };
 320
 321 int kvm_arch_hardware_setup(void *opaque)
 322 {
 323         gmap_notifier.notifier_call = kvm_gmap_notifier;
 324         gmap_register_pte_notifier(&gmap_notifier);
 325         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 326         gmap_register_pte_notifier(&vsie_gmap_notifier);
 327         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 328                                        &kvm_clock_notifier);
 329         return 0;
 330 }
 331
 332 void kvm_arch_hardware_unsetup(void)
 333 {
 334         gmap_unregister_pte_notifier(&gmap_notifier);
 335         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 336         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 337                                          &kvm_clock_notifier);
 338 }
 339
 340 static void allow_cpu_feat(unsigned long nr)
 341 {
 342         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 343 }
 344
 345 static inline int plo_test_bit(unsigned char nr)
 346 {
 347         unsigned long function = (unsigned long)nr | 0x100;
 348         int cc;
 349
 350         asm volatile(
 351                 "       lgr     0,%[function]\n"
 352                 /* Parameter registers are ignored for "test bit" */
 353                 "       plo     0,0,0,0(0)\n"
 354                 "       ipm     %0\n"
 355                 "       srl     %0,28\n"
 356                 : "=d" (cc)
 357                 : [function] "d" (function)
 358                 : "cc", "0");
 359         return cc == 0;
 360 }
 361
 362 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 363 {
 364         asm volatile(
 365                 "       lghi    0,0\n"
 366                 "       lgr     1,%[query]\n"
 367                 /* Parameter registers are ignored */
 368                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 369                 :
 370                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 371                 : "cc", "memory", "0", "1");
 372 }
 373
 374 #define INSN_SORTL 0xb938
 375 #define INSN_DFLTCC 0xb939
 376
 377 static void kvm_s390_cpu_feat_init(void)
 378 {
 379         int i;
 380
 381         for (i = 0; i < 256; ++i) {
 382                 if (plo_test_bit(i))
 383                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 384         }
 385
 386         if (test_facility(28)) /* TOD-clock steering */
 387                 ptff(kvm_s390_available_subfunc.ptff,
 388                      sizeof(kvm_s390_available_subfunc.ptff),
 389                      PTFF_QAF);
 390
 391         if (test_facility(17)) { /* MSA */
 392                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 393                               kvm_s390_available_subfunc.kmac);
 394                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 395                               kvm_s390_available_subfunc.kmc);
 396                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 397                               kvm_s390_available_subfunc.km);
 398                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 399                               kvm_s390_available_subfunc.kimd);
 400                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 401                               kvm_s390_available_subfunc.klmd);
 402         }
 403         if (test_facility(76)) /* MSA3 */
 404                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 405                               kvm_s390_available_subfunc.pckmo);
 406         if (test_facility(77)) { /* MSA4 */
 407                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 408                               kvm_s390_available_subfunc.kmctr);
 409                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 410                               kvm_s390_available_subfunc.kmf);
 411                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 412                               kvm_s390_available_subfunc.kmo);
 413                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 414                               kvm_s390_available_subfunc.pcc);
 415         }
 416         if (test_facility(57)) /* MSA5 */
 417                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 418                               kvm_s390_available_subfunc.ppno);
 419
 420         if (test_facility(146)) /* MSA8 */
 421                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 422                               kvm_s390_available_subfunc.kma);
 423
 424         if (test_facility(155)) /* MSA9 */
 425                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 426                               kvm_s390_available_subfunc.kdsa);
 427
 428         if (test_facility(150)) /* SORTL */
 429                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 430
 431         if (test_facility(151)) /* DFLTCC */
 432                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 433
 434         if (MACHINE_HAS_ESOP)
 435                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 436         /*
 437          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 438          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 439          */
 440         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 441             !test_facility(3) || !nested)
 442                 return;
 443         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 444         if (sclp.has_64bscao)
 445                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 446         if (sclp.has_siif)
 447                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 448         if (sclp.has_gpere)
 449                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 450         if (sclp.has_gsls)
 451                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 452         if (sclp.has_ib)
 453                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 454         if (sclp.has_cei)
 455                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 456         if (sclp.has_ibs)
 457                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 458         if (sclp.has_kss)
 459                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 460         /*
 461          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 462          * all skey handling functions read/set the skey from the PGSTE
 463          * instead of the real storage key.
 464          *
 465          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 466          * pages being detected as preserved although they are resident.
 467          *
 468          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 469          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 470          *
 471          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 472          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 473          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 474          *
 475          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 476          * cannot easily shadow the SCA because of the ipte lock.
 477          */
 478 }
 479
 480 int kvm_arch_init(void *opaque)
 481 {
 482         int rc = -ENOMEM;
 483
 484         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 485         if (!kvm_s390_dbf)
 486                 return -ENOMEM;
 487
 488         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 489         if (!kvm_s390_dbf_uv)
 490                 goto out;
 491
 492         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 493             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 494                 goto out;
 495
 496         kvm_s390_cpu_feat_init();
 497
 498         /* Register floating interrupt controller interface. */
 499         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 500         if (rc) {
 501                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 502                 goto out;
 503         }
 504
 505         rc = kvm_s390_gib_init(GAL_ISC);
 506         if (rc)
 507                 goto out;
 508
 509         return 0;
 510
 511 out:
 512         kvm_arch_exit();
 513         return rc;
 514 }
 515
 516 void kvm_arch_exit(void)
 517 {
 518         kvm_s390_gib_destroy();
 519         debug_unregister(kvm_s390_dbf);
 520         debug_unregister(kvm_s390_dbf_uv);
 521 }
 522
 523 /* Section: device related */
 524 long kvm_arch_dev_ioctl(struct file *filp,
 525                         unsigned int ioctl, unsigned long arg)
 526 {
 527         if (ioctl == KVM_S390_ENABLE_SIE)
 528                 return s390_enable_sie();
 529         return -EINVAL;
 530 }
 531
 532 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 533 {
 534         int r;
 535
 536         switch (ext) {
 537         case KVM_CAP_S390_PSW:
 538         case KVM_CAP_S390_GMAP:
 539         case KVM_CAP_SYNC_MMU:
 540 #ifdef CONFIG_KVM_S390_UCONTROL
 541         case KVM_CAP_S390_UCONTROL:
 542 #endif
 543         case KVM_CAP_ASYNC_PF:
 544         case KVM_CAP_SYNC_REGS:
 545         case KVM_CAP_ONE_REG:
 546         case KVM_CAP_ENABLE_CAP:
 547         case KVM_CAP_S390_CSS_SUPPORT:
 548         case KVM_CAP_IOEVENTFD:
 549         case KVM_CAP_DEVICE_CTRL:
 550         case KVM_CAP_S390_IRQCHIP:
 551         case KVM_CAP_VM_ATTRIBUTES:
 552         case KVM_CAP_MP_STATE:
 553         case KVM_CAP_IMMEDIATE_EXIT:
 554         case KVM_CAP_S390_INJECT_IRQ:
 555         case KVM_CAP_S390_USER_SIGP:
 556         case KVM_CAP_S390_USER_STSI:
 557         case KVM_CAP_S390_SKEYS:
 558         case KVM_CAP_S390_IRQ_STATE:
 559         case KVM_CAP_S390_USER_INSTR0:
 560         case KVM_CAP_S390_CMMA_MIGRATION:
 561         case KVM_CAP_S390_AIS:
 562         case KVM_CAP_S390_AIS_MIGRATION:
 563         case KVM_CAP_S390_VCPU_RESETS:
 564         case KVM_CAP_SET_GUEST_DEBUG:
 565         case KVM_CAP_S390_DIAG318:
 566         case KVM_CAP_S390_MEM_OP_EXTENSION:
 567                 r = 1;
 568                 break;
 569         case KVM_CAP_SET_GUEST_DEBUG2:
 570                 r = KVM_GUESTDBG_VALID_MASK;
 571                 break;
 572         case KVM_CAP_S390_HPAGE_1M:
 573                 r = 0;
 574                 if (hpage && !kvm_is_ucontrol(kvm))
 575                         r = 1;
 576                 break;
 577         case KVM_CAP_S390_MEM_OP:
 578                 r = MEM_OP_MAX_SIZE;
 579                 break;
 580         case KVM_CAP_NR_VCPUS:
 581         case KVM_CAP_MAX_VCPUS:
 582         case KVM_CAP_MAX_VCPU_ID:
 583                 r = KVM_S390_BSCA_CPU_SLOTS;
 584                 if (!kvm_s390_use_sca_entries())
 585                         r = KVM_MAX_VCPUS;
 586                 else if (sclp.has_esca && sclp.has_64bscao)
 587                         r = KVM_S390_ESCA_CPU_SLOTS;
 588                 if (ext == KVM_CAP_NR_VCPUS)
 589                         r = min_t(unsigned int, num_online_cpus(), r);
 590                 break;
 591         case KVM_CAP_S390_COW:
 592                 r = MACHINE_HAS_ESOP;
 593                 break;
 594         case KVM_CAP_S390_VECTOR_REGISTERS:
 595                 r = MACHINE_HAS_VX;
 596                 break;
 597         case KVM_CAP_S390_RI:
 598                 r = test_facility(64);
 599                 break;
 600         case KVM_CAP_S390_GS:
 601                 r = test_facility(133);
 602                 break;
 603         case KVM_CAP_S390_BPB:
 604                 r = test_facility(82);
 605                 break;
 606         case KVM_CAP_S390_PROTECTED:
 607                 r = is_prot_virt_host();
 608                 break;
 609         default:
 610                 r = 0;
 611         }
 612         return r;
 613 }
 614
 615 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 616 {
 617         int i;
 618         gfn_t cur_gfn, last_gfn;
 619         unsigned long gaddr, vmaddr;
 620         struct gmap *gmap = kvm->arch.gmap;
 621         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 622
 623         /* Loop over all guest segments */
 624         cur_gfn = memslot->base_gfn;
 625         last_gfn = memslot->base_gfn + memslot->npages;
 626         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 627                 gaddr = gfn_to_gpa(cur_gfn);
 628                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 629                 if (kvm_is_error_hva(vmaddr))
 630                         continue;
 631
 632                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 633                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 634                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 635                         if (test_bit(i, bitmap))
 636                                 mark_page_dirty(kvm, cur_gfn + i);
 637                 }
 638
 639                 if (fatal_signal_pending(current))
 640                         return;
 641                 cond_resched();
 642         }
 643 }
 644
 645 /* Section: vm related */
 646 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 647
 648 /*
 649  * Get (and clear) the dirty memory log for a memory slot.
 650  */
 651 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 652                                struct kvm_dirty_log *log)
 653 {
 654         int r;
 655         unsigned long n;
 656         struct kvm_memory_slot *memslot;
 657         int is_dirty;
 658
 659         if (kvm_is_ucontrol(kvm))
 660                 return -EINVAL;
 661
 662         mutex_lock(&kvm->slots_lock);
 663
 664         r = -EINVAL;
 665         if (log->slot >= KVM_USER_MEM_SLOTS)
 666                 goto out;
 667
 668         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 669         if (r)
 670                 goto out;
 671
 672         /* Clear the dirty log */
 673         if (is_dirty) {
 674                 n = kvm_dirty_bitmap_bytes(memslot);
 675                 memset(memslot->dirty_bitmap, 0, n);
 676         }
 677         r = 0;
 678 out:
 679         mutex_unlock(&kvm->slots_lock);
 680         return r;
 681 }
 682
 683 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 684 {
 685         unsigned long i;
 686         struct kvm_vcpu *vcpu;
 687
 688         kvm_for_each_vcpu(i, vcpu, kvm) {
 689                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 690         }
 691 }
 692
 693 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 694 {
 695         int r;
 696
 697         if (cap->flags)
 698                 return -EINVAL;
 699
 700         switch (cap->cap) {
 701         case KVM_CAP_S390_IRQCHIP:
 702                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 703                 kvm->arch.use_irqchip = 1;
 704                 r = 0;
 705                 break;
 706         case KVM_CAP_S390_USER_SIGP:
 707                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 708                 kvm->arch.user_sigp = 1;
 709                 r = 0;
 710                 break;
 711         case KVM_CAP_S390_VECTOR_REGISTERS:
 712                 mutex_lock(&kvm->lock);
 713                 if (kvm->created_vcpus) {
 714                         r = -EBUSY;
 715                 } else if (MACHINE_HAS_VX) {
 716                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 717                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 718                         if (test_facility(134)) {
 719                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 720                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 721                         }
 722                         if (test_facility(135)) {
 723                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 724                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 725                         }
 726                         if (test_facility(148)) {
 727                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 728                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 729                         }
 730                         if (test_facility(152)) {
 731                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 732                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 733                         }
 734                         if (test_facility(192)) {
 735                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 736                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 737                         }
 738                         r = 0;
 739                 } else
 740                         r = -EINVAL;
 741                 mutex_unlock(&kvm->lock);
 742                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 743                          r ? "(not available)" : "(success)");
 744                 break;
 745         case KVM_CAP_S390_RI:
 746                 r = -EINVAL;
 747                 mutex_lock(&kvm->lock);
 748                 if (kvm->created_vcpus) {
 749                         r = -EBUSY;
 750                 } else if (test_facility(64)) {
 751                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 752                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 753                         r = 0;
 754                 }
 755                 mutex_unlock(&kvm->lock);
 756                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 757                          r ? "(not available)" : "(success)");
 758                 break;
 759         case KVM_CAP_S390_AIS:
 760                 mutex_lock(&kvm->lock);
 761                 if (kvm->created_vcpus) {
 762                         r = -EBUSY;
 763                 } else {
 764                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 765                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 766                         r = 0;
 767                 }
 768                 mutex_unlock(&kvm->lock);
 769                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 770                          r ? "(not available)" : "(success)");
 771                 break;
 772         case KVM_CAP_S390_GS:
 773                 r = -EINVAL;
 774                 mutex_lock(&kvm->lock);
 775                 if (kvm->created_vcpus) {
 776                         r = -EBUSY;
 777                 } else if (test_facility(133)) {
 778                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 779                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 780                         r = 0;
 781                 }
 782                 mutex_unlock(&kvm->lock);
 783                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 784                          r ? "(not available)" : "(success)");
 785                 break;
 786         case KVM_CAP_S390_HPAGE_1M:
 787                 mutex_lock(&kvm->lock);
 788                 if (kvm->created_vcpus)
 789                         r = -EBUSY;
 790                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 791                         r = -EINVAL;
 792                 else {
 793                         r = 0;
 794                         mmap_write_lock(kvm->mm);
 795                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 796                         mmap_write_unlock(kvm->mm);
 797                         /*
 798                          * We might have to create fake 4k page
 799                          * tables. To avoid that the hardware works on
 800                          * stale PGSTEs, we emulate these instructions.
 801                          */
 802                         kvm->arch.use_skf = 0;
 803                         kvm->arch.use_pfmfi = 0;
 804                 }
 805                 mutex_unlock(&kvm->lock);
 806                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 807                          r ? "(not available)" : "(success)");
 808                 break;
 809         case KVM_CAP_S390_USER_STSI:
 810                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 811                 kvm->arch.user_stsi = 1;
 812                 r = 0;
 813                 break;
 814         case KVM_CAP_S390_USER_INSTR0:
 815                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 816                 kvm->arch.user_instr0 = 1;
 817                 icpt_operexc_on_all_vcpus(kvm);
 818                 r = 0;
 819                 break;
 820         default:
 821                 r = -EINVAL;
 822                 break;
 823         }
 824         return r;
 825 }
 826
 827 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 828 {
 829         int ret;
 830
 831         switch (attr->attr) {
 832         case KVM_S390_VM_MEM_LIMIT_SIZE:
 833                 ret = 0;
 834                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 835                          kvm->arch.mem_limit);
 836                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 837                         ret = -EFAULT;
 838                 break;
 839         default:
 840                 ret = -ENXIO;
 841                 break;
 842         }
 843         return ret;
 844 }
 845
 846 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 847 {
 848         int ret;
 849         unsigned int idx;
 850         switch (attr->attr) {
 851         case KVM_S390_VM_MEM_ENABLE_CMMA:
 852                 ret = -ENXIO;
 853                 if (!sclp.has_cmma)
 854                         break;
 855
 856                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 857                 mutex_lock(&kvm->lock);
 858                 if (kvm->created_vcpus)
 859                         ret = -EBUSY;
 860                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 861                         ret = -EINVAL;
 862                 else {
 863                         kvm->arch.use_cmma = 1;
 864                         /* Not compatible with cmma. */
 865                         kvm->arch.use_pfmfi = 0;
 866                         ret = 0;
 867                 }
 868                 mutex_unlock(&kvm->lock);
 869                 break;
 870         case KVM_S390_VM_MEM_CLR_CMMA:
 871                 ret = -ENXIO;
 872                 if (!sclp.has_cmma)
 873                         break;
 874                 ret = -EINVAL;
 875                 if (!kvm->arch.use_cmma)
 876                         break;
 877
 878                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 879                 mutex_lock(&kvm->lock);
 880                 idx = srcu_read_lock(&kvm->srcu);
 881                 s390_reset_cmma(kvm->arch.gmap->mm);
 882                 srcu_read_unlock(&kvm->srcu, idx);
 883                 mutex_unlock(&kvm->lock);
 884                 ret = 0;
 885                 break;
 886         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 887                 unsigned long new_limit;
 888
 889                 if (kvm_is_ucontrol(kvm))
 890                         return -EINVAL;
 891
 892                 if (get_user(new_limit, (u64 __user *)attr->addr))
 893                         return -EFAULT;
 894
 895                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 896                     new_limit > kvm->arch.mem_limit)
 897                         return -E2BIG;
 898
 899                 if (!new_limit)
 900                         return -EINVAL;
 901
 902                 /* gmap_create takes last usable address */
 903                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 904                         new_limit -= 1;
 905
 906                 ret = -EBUSY;
 907                 mutex_lock(&kvm->lock);
 908                 if (!kvm->created_vcpus) {
 909                         /* gmap_create will round the limit up */
 910                         struct gmap *new = gmap_create(current->mm, new_limit);
 911
 912                         if (!new) {
 913                                 ret = -ENOMEM;
 914                         } else {
 915                                 gmap_remove(kvm->arch.gmap);
 916                                 new->private = kvm;
 917                                 kvm->arch.gmap = new;
 918                                 ret = 0;
 919                         }
 920                 }
 921                 mutex_unlock(&kvm->lock);
 922                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 923                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 924                          (void *) kvm->arch.gmap->asce);
 925                 break;
 926         }
 927         default:
 928                 ret = -ENXIO;
 929                 break;
 930         }
 931         return ret;
 932 }
 933
 934 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 935
 936 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 937 {
 938         struct kvm_vcpu *vcpu;
 939         unsigned long i;
 940
 941         kvm_s390_vcpu_block_all(kvm);
 942
 943         kvm_for_each_vcpu(i, vcpu, kvm) {
 944                 kvm_s390_vcpu_crypto_setup(vcpu);
 945                 /* recreate the shadow crycb by leaving the VSIE handler */
 946                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 947         }
 948
 949         kvm_s390_vcpu_unblock_all(kvm);
 950 }
 951
 952 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 953 {
 954         mutex_lock(&kvm->lock);
 955         switch (attr->attr) {
 956         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 957                 if (!test_kvm_facility(kvm, 76)) {
 958                         mutex_unlock(&kvm->lock);
 959                         return -EINVAL;
 960                 }
 961                 get_random_bytes(
 962                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 963                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 964                 kvm->arch.crypto.aes_kw = 1;
 965                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 966                 break;
 967         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 968                 if (!test_kvm_facility(kvm, 76)) {
 969                         mutex_unlock(&kvm->lock);
 970                         return -EINVAL;
 971                 }
 972                 get_random_bytes(
 973                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 974                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 975                 kvm->arch.crypto.dea_kw = 1;
 976                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 977                 break;
 978         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 979                 if (!test_kvm_facility(kvm, 76)) {
 980                         mutex_unlock(&kvm->lock);
 981                         return -EINVAL;
 982                 }
 983                 kvm->arch.crypto.aes_kw = 0;
 984                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 985                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 986                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 987                 break;
 988         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 989                 if (!test_kvm_facility(kvm, 76)) {
 990                         mutex_unlock(&kvm->lock);
 991                         return -EINVAL;
 992                 }
 993                 kvm->arch.crypto.dea_kw = 0;
 994                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 995                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 996                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 997                 break;
 998         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
 999                 if (!ap_instructions_available()) {
1000                         mutex_unlock(&kvm->lock);
1001                         return -EOPNOTSUPP;
1002                 }
1003                 kvm->arch.crypto.apie = 1;
1004                 break;
1005         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1006                 if (!ap_instructions_available()) {
1007                         mutex_unlock(&kvm->lock);
1008                         return -EOPNOTSUPP;
1009                 }
1010                 kvm->arch.crypto.apie = 0;
1011                 break;
1012         default:
1013                 mutex_unlock(&kvm->lock);
1014                 return -ENXIO;
1015         }
1016
1017         kvm_s390_vcpu_crypto_reset_all(kvm);
1018         mutex_unlock(&kvm->lock);
1019         return 0;
1020 }
1021
1022 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1023 {
1024         unsigned long cx;
1025         struct kvm_vcpu *vcpu;
1026
1027         kvm_for_each_vcpu(cx, vcpu, kvm)
1028                 kvm_s390_sync_request(req, vcpu);
1029 }
1030
1031 /*
1032  * Must be called with kvm->srcu held to avoid races on memslots, and with
1033  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1034  */
1035 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1036 {
1037         struct kvm_memory_slot *ms;
1038         struct kvm_memslots *slots;
1039         unsigned long ram_pages = 0;
1040         int bkt;
1041
1042         /* migration mode already enabled */
1043         if (kvm->arch.migration_mode)
1044                 return 0;
1045         slots = kvm_memslots(kvm);
1046         if (!slots || kvm_memslots_empty(slots))
1047                 return -EINVAL;
1048
1049         if (!kvm->arch.use_cmma) {
1050                 kvm->arch.migration_mode = 1;
1051                 return 0;
1052         }
1053         /* mark all the pages in active slots as dirty */
1054         kvm_for_each_memslot(ms, bkt, slots) {
1055                 if (!ms->dirty_bitmap)
1056                         return -EINVAL;
1057                 /*
1058                  * The second half of the bitmap is only used on x86,
1059                  * and would be wasted otherwise, so we put it to good
1060                  * use here to keep track of the state of the storage
1061                  * attributes.
1062                  */
1063                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1064                 ram_pages += ms->npages;
1065         }
1066         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1067         kvm->arch.migration_mode = 1;
1068         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1069         return 0;
1070 }
1071
1072 /*
1073  * Must be called with kvm->slots_lock to avoid races with ourselves and
1074  * kvm_s390_vm_start_migration.
1075  */
1076 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1077 {
1078         /* migration mode already disabled */
1079         if (!kvm->arch.migration_mode)
1080                 return 0;
1081         kvm->arch.migration_mode = 0;
1082         if (kvm->arch.use_cmma)
1083                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1084         return 0;
1085 }
1086
1087 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1088                                      struct kvm_device_attr *attr)
1089 {
1090         int res = -ENXIO;
1091
1092         mutex_lock(&kvm->slots_lock);
1093         switch (attr->attr) {
1094         case KVM_S390_VM_MIGRATION_START:
1095                 res = kvm_s390_vm_start_migration(kvm);
1096                 break;
1097         case KVM_S390_VM_MIGRATION_STOP:
1098                 res = kvm_s390_vm_stop_migration(kvm);
1099                 break;
1100         default:
1101                 break;
1102         }
1103         mutex_unlock(&kvm->slots_lock);
1104
1105         return res;
1106 }
1107
1108 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1109                                      struct kvm_device_attr *attr)
1110 {
1111         u64 mig = kvm->arch.migration_mode;
1112
1113         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1114                 return -ENXIO;
1115
1116         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1117                 return -EFAULT;
1118         return 0;
1119 }
1120
1121 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1122 {
1123         struct kvm_s390_vm_tod_clock gtod;
1124
1125         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1126                 return -EFAULT;
1127
1128         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1129                 return -EINVAL;
1130         kvm_s390_set_tod_clock(kvm, &gtod);
1131
1132         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1133                 gtod.epoch_idx, gtod.tod);
1134
1135         return 0;
1136 }
1137
1138 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1139 {
1140         u8 gtod_high;
1141
1142         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1143                                            sizeof(gtod_high)))
1144                 return -EFAULT;
1145
1146         if (gtod_high != 0)
1147                 return -EINVAL;
1148         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1149
1150         return 0;
1151 }
1152
1153 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1154 {
1155         struct kvm_s390_vm_tod_clock gtod = { 0 };
1156
1157         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1158                            sizeof(gtod.tod)))
1159                 return -EFAULT;
1160
1161         kvm_s390_set_tod_clock(kvm, &gtod);
1162         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1163         return 0;
1164 }
1165
1166 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1167 {
1168         int ret;
1169
1170         if (attr->flags)
1171                 return -EINVAL;
1172
1173         switch (attr->attr) {
1174         case KVM_S390_VM_TOD_EXT:
1175                 ret = kvm_s390_set_tod_ext(kvm, attr);
1176                 break;
1177         case KVM_S390_VM_TOD_HIGH:
1178                 ret = kvm_s390_set_tod_high(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_LOW:
1181                 ret = kvm_s390_set_tod_low(kvm, attr);
1182                 break;
1183         default:
1184                 ret = -ENXIO;
1185                 break;
1186         }
1187         return ret;
1188 }
1189
1190 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1191                                    struct kvm_s390_vm_tod_clock *gtod)
1192 {
1193         union tod_clock clk;
1194
1195         preempt_disable();
1196
1197         store_tod_clock_ext(&clk);
1198
1199         gtod->tod = clk.tod + kvm->arch.epoch;
1200         gtod->epoch_idx = 0;
1201         if (test_kvm_facility(kvm, 139)) {
1202                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1203                 if (gtod->tod < clk.tod)
1204                         gtod->epoch_idx += 1;
1205         }
1206
1207         preempt_enable();
1208 }
1209
1210 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1211 {
1212         struct kvm_s390_vm_tod_clock gtod;
1213
1214         memset(&gtod, 0, sizeof(gtod));
1215         kvm_s390_get_tod_clock(kvm, &gtod);
1216         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1217                 return -EFAULT;
1218
1219         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1220                 gtod.epoch_idx, gtod.tod);
1221         return 0;
1222 }
1223
1224 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1225 {
1226         u8 gtod_high = 0;
1227
1228         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1229                                          sizeof(gtod_high)))
1230                 return -EFAULT;
1231         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1232
1233         return 0;
1234 }
1235
1236 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1237 {
1238         u64 gtod;
1239
1240         gtod = kvm_s390_get_tod_clock_fast(kvm);
1241         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1242                 return -EFAULT;
1243         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1244
1245         return 0;
1246 }
1247
1248 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1249 {
1250         int ret;
1251
1252         if (attr->flags)
1253                 return -EINVAL;
1254
1255         switch (attr->attr) {
1256         case KVM_S390_VM_TOD_EXT:
1257                 ret = kvm_s390_get_tod_ext(kvm, attr);
1258                 break;
1259         case KVM_S390_VM_TOD_HIGH:
1260                 ret = kvm_s390_get_tod_high(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_TOD_LOW:
1263                 ret = kvm_s390_get_tod_low(kvm, attr);
1264                 break;
1265         default:
1266                 ret = -ENXIO;
1267                 break;
1268         }
1269         return ret;
1270 }
1271
1272 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1273 {
1274         struct kvm_s390_vm_cpu_processor *proc;
1275         u16 lowest_ibc, unblocked_ibc;
1276         int ret = 0;
1277
1278         mutex_lock(&kvm->lock);
1279         if (kvm->created_vcpus) {
1280                 ret = -EBUSY;
1281                 goto out;
1282         }
1283         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1284         if (!proc) {
1285                 ret = -ENOMEM;
1286                 goto out;
1287         }
1288         if (!copy_from_user(proc, (void __user *)attr->addr,
1289                             sizeof(*proc))) {
1290                 kvm->arch.model.cpuid = proc->cpuid;
1291                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1292                 unblocked_ibc = sclp.ibc & 0xfff;
1293                 if (lowest_ibc && proc->ibc) {
1294                         if (proc->ibc > unblocked_ibc)
1295                                 kvm->arch.model.ibc = unblocked_ibc;
1296                         else if (proc->ibc < lowest_ibc)
1297                                 kvm->arch.model.ibc = lowest_ibc;
1298                         else
1299                                 kvm->arch.model.ibc = proc->ibc;
1300                 }
1301                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1302                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1303                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1304                          kvm->arch.model.ibc,
1305                          kvm->arch.model.cpuid);
1306                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1307                          kvm->arch.model.fac_list[0],
1308                          kvm->arch.model.fac_list[1],
1309                          kvm->arch.model.fac_list[2]);
1310         } else
1311                 ret = -EFAULT;
1312         kfree(proc);
1313 out:
1314         mutex_unlock(&kvm->lock);
1315         return ret;
1316 }
1317
1318 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1319                                        struct kvm_device_attr *attr)
1320 {
1321         struct kvm_s390_vm_cpu_feat data;
1322
1323         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1324                 return -EFAULT;
1325         if (!bitmap_subset((unsigned long *) data.feat,
1326                            kvm_s390_available_cpu_feat,
1327                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1328                 return -EINVAL;
1329
1330         mutex_lock(&kvm->lock);
1331         if (kvm->created_vcpus) {
1332                 mutex_unlock(&kvm->lock);
1333                 return -EBUSY;
1334         }
1335         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1336                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1337         mutex_unlock(&kvm->lock);
1338         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1339                          data.feat[0],
1340                          data.feat[1],
1341                          data.feat[2]);
1342         return 0;
1343 }
1344
1345 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1346                                           struct kvm_device_attr *attr)
1347 {
1348         mutex_lock(&kvm->lock);
1349         if (kvm->created_vcpus) {
1350                 mutex_unlock(&kvm->lock);
1351                 return -EBUSY;
1352         }
1353
1354         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1355                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1356                 mutex_unlock(&kvm->lock);
1357                 return -EFAULT;
1358         }
1359         mutex_unlock(&kvm->lock);
1360
1361         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1362                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1363                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1364                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1366         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1369         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1384         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1387         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1390         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1393         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1396         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1398                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1399         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1401                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1402         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1404                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1405         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1407                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1408         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1411                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1412                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1413         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1416                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1417                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1418
1419         return 0;
1420 }
1421
1422 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1423 {
1424         int ret = -ENXIO;
1425
1426         switch (attr->attr) {
1427         case KVM_S390_VM_CPU_PROCESSOR:
1428                 ret = kvm_s390_set_processor(kvm, attr);
1429                 break;
1430         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1431                 ret = kvm_s390_set_processor_feat(kvm, attr);
1432                 break;
1433         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1434                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1435                 break;
1436         }
1437         return ret;
1438 }
1439
1440 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1441 {
1442         struct kvm_s390_vm_cpu_processor *proc;
1443         int ret = 0;
1444
1445         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1446         if (!proc) {
1447                 ret = -ENOMEM;
1448                 goto out;
1449         }
1450         proc->cpuid = kvm->arch.model.cpuid;
1451         proc->ibc = kvm->arch.model.ibc;
1452         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1453                S390_ARCH_FAC_LIST_SIZE_BYTE);
1454         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1455                  kvm->arch.model.ibc,
1456                  kvm->arch.model.cpuid);
1457         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1458                  kvm->arch.model.fac_list[0],
1459                  kvm->arch.model.fac_list[1],
1460                  kvm->arch.model.fac_list[2]);
1461         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1462                 ret = -EFAULT;
1463         kfree(proc);
1464 out:
1465         return ret;
1466 }
1467
1468 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1469 {
1470         struct kvm_s390_vm_cpu_machine *mach;
1471         int ret = 0;
1472
1473         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1474         if (!mach) {
1475                 ret = -ENOMEM;
1476                 goto out;
1477         }
1478         get_cpu_id((struct cpuid *) &mach->cpuid);
1479         mach->ibc = sclp.ibc;
1480         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1481                S390_ARCH_FAC_LIST_SIZE_BYTE);
1482         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1483                sizeof(stfle_fac_list));
1484         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1485                  kvm->arch.model.ibc,
1486                  kvm->arch.model.cpuid);
1487         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1488                  mach->fac_mask[0],
1489                  mach->fac_mask[1],
1490                  mach->fac_mask[2]);
1491         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1492                  mach->fac_list[0],
1493                  mach->fac_list[1],
1494                  mach->fac_list[2]);
1495         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1496                 ret = -EFAULT;
1497         kfree(mach);
1498 out:
1499         return ret;
1500 }
1501
1502 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1503                                        struct kvm_device_attr *attr)
1504 {
1505         struct kvm_s390_vm_cpu_feat data;
1506
1507         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1508                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1509         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1510                 return -EFAULT;
1511         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1512                          data.feat[0],
1513                          data.feat[1],
1514                          data.feat[2]);
1515         return 0;
1516 }
1517
1518 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1519                                      struct kvm_device_attr *attr)
1520 {
1521         struct kvm_s390_vm_cpu_feat data;
1522
1523         bitmap_copy((unsigned long *) data.feat,
1524                     kvm_s390_available_cpu_feat,
1525                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1526         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1527                 return -EFAULT;
1528         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1529                          data.feat[0],
1530                          data.feat[1],
1531                          data.feat[2]);
1532         return 0;
1533 }
1534
1535 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1536                                           struct kvm_device_attr *attr)
1537 {
1538         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1539             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1540                 return -EFAULT;
1541
1542         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1543                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1544                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1545                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1547         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1550         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1565         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1568         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1571         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1574         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1577         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1579                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1580         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1582                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1583         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1585                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1586         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1588                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1589         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1592                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1594         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1595                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1597                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1598                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1599
1600         return 0;
1601 }
1602
1603 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1604                                         struct kvm_device_attr *attr)
1605 {
1606         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1607             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1608                 return -EFAULT;
1609
1610         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1611                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1612                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1613                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1614                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1615         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1616                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1618         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1633         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1634                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1636         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1639         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1640                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1641                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1642         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1643                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1644                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1645         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1646                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1647                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1648         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1649                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1650                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1651         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1652                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1653                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1654         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1655                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1656                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1657         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1658                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1659                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1660                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1661                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1662         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1663                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1664                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1665                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1666                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1667
1668         return 0;
1669 }
1670
1671 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1672 {
1673         int ret = -ENXIO;
1674
1675         switch (attr->attr) {
1676         case KVM_S390_VM_CPU_PROCESSOR:
1677                 ret = kvm_s390_get_processor(kvm, attr);
1678                 break;
1679         case KVM_S390_VM_CPU_MACHINE:
1680                 ret = kvm_s390_get_machine(kvm, attr);
1681                 break;
1682         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1683                 ret = kvm_s390_get_processor_feat(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_CPU_MACHINE_FEAT:
1686                 ret = kvm_s390_get_machine_feat(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1689                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1690                 break;
1691         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1692                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1693                 break;
1694         }
1695         return ret;
1696 }
1697
1698 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1699 {
1700         int ret;
1701
1702         switch (attr->group) {
1703         case KVM_S390_VM_MEM_CTRL:
1704                 ret = kvm_s390_set_mem_control(kvm, attr);
1705                 break;
1706         case KVM_S390_VM_TOD:
1707                 ret = kvm_s390_set_tod(kvm, attr);
1708                 break;
1709         case KVM_S390_VM_CPU_MODEL:
1710                 ret = kvm_s390_set_cpu_model(kvm, attr);
1711                 break;
1712         case KVM_S390_VM_CRYPTO:
1713                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1714                 break;
1715         case KVM_S390_VM_MIGRATION:
1716                 ret = kvm_s390_vm_set_migration(kvm, attr);
1717                 break;
1718         default:
1719                 ret = -ENXIO;
1720                 break;
1721         }
1722
1723         return ret;
1724 }
1725
1726 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1727 {
1728         int ret;
1729
1730         switch (attr->group) {
1731         case KVM_S390_VM_MEM_CTRL:
1732                 ret = kvm_s390_get_mem_control(kvm, attr);
1733                 break;
1734         case KVM_S390_VM_TOD:
1735                 ret = kvm_s390_get_tod(kvm, attr);
1736                 break;
1737         case KVM_S390_VM_CPU_MODEL:
1738                 ret = kvm_s390_get_cpu_model(kvm, attr);
1739                 break;
1740         case KVM_S390_VM_MIGRATION:
1741                 ret = kvm_s390_vm_get_migration(kvm, attr);
1742                 break;
1743         default:
1744                 ret = -ENXIO;
1745                 break;
1746         }
1747
1748         return ret;
1749 }
1750
1751 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1752 {
1753         int ret;
1754
1755         switch (attr->group) {
1756         case KVM_S390_VM_MEM_CTRL:
1757                 switch (attr->attr) {
1758                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1759                 case KVM_S390_VM_MEM_CLR_CMMA:
1760                         ret = sclp.has_cmma ? 0 : -ENXIO;
1761                         break;
1762                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1763                         ret = 0;
1764                         break;
1765                 default:
1766                         ret = -ENXIO;
1767                         break;
1768                 }
1769                 break;
1770         case KVM_S390_VM_TOD:
1771                 switch (attr->attr) {
1772                 case KVM_S390_VM_TOD_LOW:
1773                 case KVM_S390_VM_TOD_HIGH:
1774                         ret = 0;
1775                         break;
1776                 default:
1777                         ret = -ENXIO;
1778                         break;
1779                 }
1780                 break;
1781         case KVM_S390_VM_CPU_MODEL:
1782                 switch (attr->attr) {
1783                 case KVM_S390_VM_CPU_PROCESSOR:
1784                 case KVM_S390_VM_CPU_MACHINE:
1785                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1786                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1787                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1788                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1789                         ret = 0;
1790                         break;
1791                 default:
1792                         ret = -ENXIO;
1793                         break;
1794                 }
1795                 break;
1796         case KVM_S390_VM_CRYPTO:
1797                 switch (attr->attr) {
1798                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1799                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1800                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1801                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1802                         ret = 0;
1803                         break;
1804                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1805                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1806                         ret = ap_instructions_available() ? 0 : -ENXIO;
1807                         break;
1808                 default:
1809                         ret = -ENXIO;
1810                         break;
1811                 }
1812                 break;
1813         case KVM_S390_VM_MIGRATION:
1814                 ret = 0;
1815                 break;
1816         default:
1817                 ret = -ENXIO;
1818                 break;
1819         }
1820
1821         return ret;
1822 }
1823
1824 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1825 {
1826         uint8_t *keys;
1827         uint64_t hva;
1828         int srcu_idx, i, r = 0;
1829
1830         if (args->flags != 0)
1831                 return -EINVAL;
1832
1833         /* Is this guest using storage keys? */
1834         if (!mm_uses_skeys(current->mm))
1835                 return KVM_S390_GET_SKEYS_NONE;
1836
1837         /* Enforce sane limit on memory allocation */
1838         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1839                 return -EINVAL;
1840
1841         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1842         if (!keys)
1843                 return -ENOMEM;
1844
1845         mmap_read_lock(current->mm);
1846         srcu_idx = srcu_read_lock(&kvm->srcu);
1847         for (i = 0; i < args->count; i++) {
1848                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1849                 if (kvm_is_error_hva(hva)) {
1850                         r = -EFAULT;
1851                         break;
1852                 }
1853
1854                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1855                 if (r)
1856                         break;
1857         }
1858         srcu_read_unlock(&kvm->srcu, srcu_idx);
1859         mmap_read_unlock(current->mm);
1860
1861         if (!r) {
1862                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1863                                  sizeof(uint8_t) * args->count);
1864                 if (r)
1865                         r = -EFAULT;
1866         }
1867
1868         kvfree(keys);
1869         return r;
1870 }
1871
1872 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1873 {
1874         uint8_t *keys;
1875         uint64_t hva;
1876         int srcu_idx, i, r = 0;
1877         bool unlocked;
1878
1879         if (args->flags != 0)
1880                 return -EINVAL;
1881
1882         /* Enforce sane limit on memory allocation */
1883         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1884                 return -EINVAL;
1885
1886         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1887         if (!keys)
1888                 return -ENOMEM;
1889
1890         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1891                            sizeof(uint8_t) * args->count);
1892         if (r) {
1893                 r = -EFAULT;
1894                 goto out;
1895         }
1896
1897         /* Enable storage key handling for the guest */
1898         r = s390_enable_skey();
1899         if (r)
1900                 goto out;
1901
1902         i = 0;
1903         mmap_read_lock(current->mm);
1904         srcu_idx = srcu_read_lock(&kvm->srcu);
1905         while (i < args->count) {
1906                 unlocked = false;
1907                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1908                 if (kvm_is_error_hva(hva)) {
1909                         r = -EFAULT;
1910                         break;
1911                 }
1912
1913                 /* Lowest order bit is reserved */
1914                 if (keys[i] & 0x01) {
1915                         r = -EINVAL;
1916                         break;
1917                 }
1918
1919                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1920                 if (r) {
1921                         r = fixup_user_fault(current->mm, hva,
1922                                              FAULT_FLAG_WRITE, &unlocked);
1923                         if (r)
1924                                 break;
1925                 }
1926                 if (!r)
1927                         i++;
1928         }
1929         srcu_read_unlock(&kvm->srcu, srcu_idx);
1930         mmap_read_unlock(current->mm);
1931 out:
1932         kvfree(keys);
1933         return r;
1934 }
1935
1936 /*
1937  * Base address and length must be sent at the start of each block, therefore
1938  * it's cheaper to send some clean data, as long as it's less than the size of
1939  * two longs.
1940  */
1941 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1942 /* for consistency */
1943 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1944
1945 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1946                               u8 *res, unsigned long bufsize)
1947 {
1948         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1949
1950         args->count = 0;
1951         while (args->count < bufsize) {
1952                 hva = gfn_to_hva(kvm, cur_gfn);
1953                 /*
1954                  * We return an error if the first value was invalid, but we
1955                  * return successfully if at least one value was copied.
1956                  */
1957                 if (kvm_is_error_hva(hva))
1958                         return args->count ? 0 : -EFAULT;
1959                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1960                         pgstev = 0;
1961                 res[args->count++] = (pgstev >> 24) & 0x43;
1962                 cur_gfn++;
1963         }
1964
1965         return 0;
1966 }
1967
1968 static struct kvm_memory_slot *gfn_to_memslot_approx(struct kvm_memslots *slots,
1969                                                      gfn_t gfn)
1970 {
1971         return ____gfn_to_memslot(slots, gfn, true);
1972 }
1973
1974 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
1975                                               unsigned long cur_gfn)
1976 {
1977         struct kvm_memory_slot *ms = gfn_to_memslot_approx(slots, cur_gfn);
1978         unsigned long ofs = cur_gfn - ms->base_gfn;
1979         struct rb_node *mnode = &ms->gfn_node[slots->node_idx];
1980
1981         if (ms->base_gfn + ms->npages <= cur_gfn) {
1982                 mnode = rb_next(mnode);
1983                 /* If we are above the highest slot, wrap around */
1984                 if (!mnode)
1985                         mnode = rb_first(&slots->gfn_tree);
1986
1987                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1988                 ofs = 0;
1989         }
1990         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
1991         while (ofs >= ms->npages && (mnode = rb_next(mnode))) {
1992                 ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
1993                 ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages);
1994         }
1995         return ms->base_gfn + ofs;
1996 }
1997
1998 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1999                              u8 *res, unsigned long bufsize)
2000 {
2001         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2002         struct kvm_memslots *slots = kvm_memslots(kvm);
2003         struct kvm_memory_slot *ms;
2004
2005         if (unlikely(kvm_memslots_empty(slots)))
2006                 return 0;
2007
2008         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2009         ms = gfn_to_memslot(kvm, cur_gfn);
2010         args->count = 0;
2011         args->start_gfn = cur_gfn;
2012         if (!ms)
2013                 return 0;
2014         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2015         mem_end = kvm_s390_get_gfn_end(slots);
2016
2017         while (args->count < bufsize) {
2018                 hva = gfn_to_hva(kvm, cur_gfn);
2019                 if (kvm_is_error_hva(hva))
2020                         return 0;
2021                 /* Decrement only if we actually flipped the bit to 0 */
2022                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2023                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2024                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2025                         pgstev = 0;
2026                 /* Save the value */
2027                 res[args->count++] = (pgstev >> 24) & 0x43;
2028                 /* If the next bit is too far away, stop. */
2029                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2030                         return 0;
2031                 /* If we reached the previous "next", find the next one */
2032                 if (cur_gfn == next_gfn)
2033                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2034                 /* Reached the end of memory or of the buffer, stop */
2035                 if ((next_gfn >= mem_end) ||
2036                     (next_gfn - args->start_gfn >= bufsize))
2037                         return 0;
2038                 cur_gfn++;
2039                 /* Reached the end of the current memslot, take the next one. */
2040                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2041                         ms = gfn_to_memslot(kvm, cur_gfn);
2042                         if (!ms)
2043                                 return 0;
2044                 }
2045         }
2046         return 0;
2047 }
2048
2049 /*
2050  * This function searches for the next page with dirty CMMA attributes, and
2051  * saves the attributes in the buffer up to either the end of the buffer or
2052  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2053  * no trailing clean bytes are saved.
2054  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2055  * output buffer will indicate 0 as length.
2056  */
2057 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2058                                   struct kvm_s390_cmma_log *args)
2059 {
2060         unsigned long bufsize;
2061         int srcu_idx, peek, ret;
2062         u8 *values;
2063
2064         if (!kvm->arch.use_cmma)
2065                 return -ENXIO;
2066         /* Invalid/unsupported flags were specified */
2067         if (args->flags & ~KVM_S390_CMMA_PEEK)
2068                 return -EINVAL;
2069         /* Migration mode query, and we are not doing a migration */
2070         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2071         if (!peek && !kvm->arch.migration_mode)
2072                 return -EINVAL;
2073         /* CMMA is disabled or was not used, or the buffer has length zero */
2074         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2075         if (!bufsize || !kvm->mm->context.uses_cmm) {
2076                 memset(args, 0, sizeof(*args));
2077                 return 0;
2078         }
2079         /* We are not peeking, and there are no dirty pages */
2080         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2081                 memset(args, 0, sizeof(*args));
2082                 return 0;
2083         }
2084
2085         values = vmalloc(bufsize);
2086         if (!values)
2087                 return -ENOMEM;
2088
2089         mmap_read_lock(kvm->mm);
2090         srcu_idx = srcu_read_lock(&kvm->srcu);
2091         if (peek)
2092                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2093         else
2094                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2095         srcu_read_unlock(&kvm->srcu, srcu_idx);
2096         mmap_read_unlock(kvm->mm);
2097
2098         if (kvm->arch.migration_mode)
2099                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2100         else
2101                 args->remaining = 0;
2102
2103         if (copy_to_user((void __user *)args->values, values, args->count))
2104                 ret = -EFAULT;
2105
2106         vfree(values);
2107         return ret;
2108 }
2109
2110 /*
2111  * This function sets the CMMA attributes for the given pages. If the input
2112  * buffer has zero length, no action is taken, otherwise the attributes are
2113  * set and the mm->context.uses_cmm flag is set.
2114  */
2115 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2116                                   const struct kvm_s390_cmma_log *args)
2117 {
2118         unsigned long hva, mask, pgstev, i;
2119         uint8_t *bits;
2120         int srcu_idx, r = 0;
2121
2122         mask = args->mask;
2123
2124         if (!kvm->arch.use_cmma)
2125                 return -ENXIO;
2126         /* invalid/unsupported flags */
2127         if (args->flags != 0)
2128                 return -EINVAL;
2129         /* Enforce sane limit on memory allocation */
2130         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2131                 return -EINVAL;
2132         /* Nothing to do */
2133         if (args->count == 0)
2134                 return 0;
2135
2136         bits = vmalloc(array_size(sizeof(*bits), args->count));
2137         if (!bits)
2138                 return -ENOMEM;
2139
2140         r = copy_from_user(bits, (void __user *)args->values, args->count);
2141         if (r) {
2142                 r = -EFAULT;
2143                 goto out;
2144         }
2145
2146         mmap_read_lock(kvm->mm);
2147         srcu_idx = srcu_read_lock(&kvm->srcu);
2148         for (i = 0; i < args->count; i++) {
2149                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2150                 if (kvm_is_error_hva(hva)) {
2151                         r = -EFAULT;
2152                         break;
2153                 }
2154
2155                 pgstev = bits[i];
2156                 pgstev = pgstev << 24;
2157                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2158                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2159         }
2160         srcu_read_unlock(&kvm->srcu, srcu_idx);
2161         mmap_read_unlock(kvm->mm);
2162
2163         if (!kvm->mm->context.uses_cmm) {
2164                 mmap_write_lock(kvm->mm);
2165                 kvm->mm->context.uses_cmm = 1;
2166                 mmap_write_unlock(kvm->mm);
2167         }
2168 out:
2169         vfree(bits);
2170         return r;
2171 }
2172
2173 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2174 {
2175         struct kvm_vcpu *vcpu;
2176         u16 rc, rrc;
2177         int ret = 0;
2178         unsigned long i;
2179
2180         /*
2181          * We ignore failures and try to destroy as many CPUs as possible.
2182          * At the same time we must not free the assigned resources when
2183          * this fails, as the ultravisor has still access to that memory.
2184          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2185          * behind.
2186          * We want to return the first failure rc and rrc, though.
2187          */
2188         kvm_for_each_vcpu(i, vcpu, kvm) {
2189                 mutex_lock(&vcpu->mutex);
2190                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2191                         *rcp = rc;
2192                         *rrcp = rrc;
2193                         ret = -EIO;
2194                 }
2195                 mutex_unlock(&vcpu->mutex);
2196         }
2197         /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */
2198         if (use_gisa)
2199                 kvm_s390_gisa_enable(kvm);
2200         return ret;
2201 }
2202
2203 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2204 {
2205         unsigned long i;
2206         int r = 0;
2207         u16 dummy;
2208
2209         struct kvm_vcpu *vcpu;
2210
2211         /* Disable the GISA if the ultravisor does not support AIV. */
2212         if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
2213                 kvm_s390_gisa_disable(kvm);
2214
2215         kvm_for_each_vcpu(i, vcpu, kvm) {
2216                 mutex_lock(&vcpu->mutex);
2217                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2218                 mutex_unlock(&vcpu->mutex);
2219                 if (r)
2220                         break;
2221         }
2222         if (r)
2223                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2224         return r;
2225 }
2226
2227 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2228 {
2229         int r = 0;
2230         u16 dummy;
2231         void __user *argp = (void __user *)cmd->data;
2232
2233         switch (cmd->cmd) {
2234         case KVM_PV_ENABLE: {
2235                 r = -EINVAL;
2236                 if (kvm_s390_pv_is_protected(kvm))
2237                         break;
2238
2239                 /*
2240                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2241                  *  esca, we need no cleanup in the error cases below
2242                  */
2243                 r = sca_switch_to_extended(kvm);
2244                 if (r)
2245                         break;
2246
2247                 mmap_write_lock(current->mm);
2248                 r = gmap_mark_unmergeable();
2249                 mmap_write_unlock(current->mm);
2250                 if (r)
2251                         break;
2252
2253                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2254                 if (r)
2255                         break;
2256
2257                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2258                 if (r)
2259                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2260
2261                 /* we need to block service interrupts from now on */
2262                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2263                 break;
2264         }
2265         case KVM_PV_DISABLE: {
2266                 r = -EINVAL;
2267                 if (!kvm_s390_pv_is_protected(kvm))
2268                         break;
2269
2270                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2271                 /*
2272                  * If a CPU could not be destroyed, destroy VM will also fail.
2273                  * There is no point in trying to destroy it. Instead return
2274                  * the rc and rrc from the first CPU that failed destroying.
2275                  */
2276                 if (r)
2277                         break;
2278                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2279
2280                 /* no need to block service interrupts any more */
2281                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2282                 break;
2283         }
2284         case KVM_PV_SET_SEC_PARMS: {
2285                 struct kvm_s390_pv_sec_parm parms = {};
2286                 void *hdr;
2287
2288                 r = -EINVAL;
2289                 if (!kvm_s390_pv_is_protected(kvm))
2290                         break;
2291
2292                 r = -EFAULT;
2293                 if (copy_from_user(&parms, argp, sizeof(parms)))
2294                         break;
2295
2296                 /* Currently restricted to 8KB */
2297                 r = -EINVAL;
2298                 if (parms.length > PAGE_SIZE * 2)
2299                         break;
2300
2301                 r = -ENOMEM;
2302                 hdr = vmalloc(parms.length);
2303                 if (!hdr)
2304                         break;
2305
2306                 r = -EFAULT;
2307                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2308                                     parms.length))
2309                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2310                                                       &cmd->rc, &cmd->rrc);
2311
2312                 vfree(hdr);
2313                 break;
2314         }
2315         case KVM_PV_UNPACK: {
2316                 struct kvm_s390_pv_unp unp = {};
2317
2318                 r = -EINVAL;
2319                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2320                         break;
2321
2322                 r = -EFAULT;
2323                 if (copy_from_user(&unp, argp, sizeof(unp)))
2324                         break;
2325
2326                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2327                                        &cmd->rc, &cmd->rrc);
2328                 break;
2329         }
2330         case KVM_PV_VERIFY: {
2331                 r = -EINVAL;
2332                 if (!kvm_s390_pv_is_protected(kvm))
2333                         break;
2334
2335                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2336                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2337                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2338                              cmd->rrc);
2339                 break;
2340         }
2341         case KVM_PV_PREP_RESET: {
2342                 r = -EINVAL;
2343                 if (!kvm_s390_pv_is_protected(kvm))
2344                         break;
2345
2346                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2347                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2348                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2349                              cmd->rc, cmd->rrc);
2350                 break;
2351         }
2352         case KVM_PV_UNSHARE_ALL: {
2353                 r = -EINVAL;
2354                 if (!kvm_s390_pv_is_protected(kvm))
2355                         break;
2356
2357                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2358                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2359                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2360                              cmd->rc, cmd->rrc);
2361                 break;
2362         }
2363         default:
2364                 r = -ENOTTY;
2365         }
2366         return r;
2367 }
2368
2369 static bool access_key_invalid(u8 access_key)
2370 {
2371         return access_key > 0xf;
2372 }
2373
2374 static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
2375 {
2376         void __user *uaddr = (void __user *)mop->buf;
2377         u64 supported_flags;
2378         void *tmpbuf = NULL;
2379         int r, srcu_idx;
2380
2381         supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
2382                           | KVM_S390_MEMOP_F_CHECK_ONLY;
2383         if (mop->flags & ~supported_flags || !mop->size)
2384                 return -EINVAL;
2385         if (mop->size > MEM_OP_MAX_SIZE)
2386                 return -E2BIG;
2387         if (kvm_s390_pv_is_protected(kvm))
2388                 return -EINVAL;
2389         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
2390                 if (access_key_invalid(mop->key))
2391                         return -EINVAL;
2392         } else {
2393                 mop->key = 0;
2394         }
2395         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2396                 tmpbuf = vmalloc(mop->size);
2397                 if (!tmpbuf)
2398                         return -ENOMEM;
2399         }
2400
2401         srcu_idx = srcu_read_lock(&kvm->srcu);
2402
2403         if (kvm_is_error_gpa(kvm, mop->gaddr)) {
2404                 r = PGM_ADDRESSING;
2405                 goto out_unlock;
2406         }
2407
2408         switch (mop->op) {
2409         case KVM_S390_MEMOP_ABSOLUTE_READ: {
2410                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2411                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
2412                 } else {
2413                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2414                                                       mop->size, GACC_FETCH, mop->key);
2415                         if (r == 0) {
2416                                 if (copy_to_user(uaddr, tmpbuf, mop->size))
2417                                         r = -EFAULT;
2418                         }
2419                 }
2420                 break;
2421         }
2422         case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
2423                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2424                         r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
2425                 } else {
2426                         if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2427                                 r = -EFAULT;
2428                                 break;
2429                         }
2430                         r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
2431                                                       mop->size, GACC_STORE, mop->key);
2432                 }
2433                 break;
2434         }
2435         default:
2436                 r = -EINVAL;
2437         }
2438
2439 out_unlock:
2440         srcu_read_unlock(&kvm->srcu, srcu_idx);
2441
2442         vfree(tmpbuf);
2443         return r;
2444 }
2445
2446 long kvm_arch_vm_ioctl(struct file *filp,
2447                        unsigned int ioctl, unsigned long arg)
2448 {
2449         struct kvm *kvm = filp->private_data;
2450         void __user *argp = (void __user *)arg;
2451         struct kvm_device_attr attr;
2452         int r;
2453
2454         switch (ioctl) {
2455         case KVM_S390_INTERRUPT: {
2456                 struct kvm_s390_interrupt s390int;
2457
2458                 r = -EFAULT;
2459                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2460                         break;
2461                 r = kvm_s390_inject_vm(kvm, &s390int);
2462                 break;
2463         }
2464         case KVM_CREATE_IRQCHIP: {
2465                 struct kvm_irq_routing_entry routing;
2466
2467                 r = -EINVAL;
2468                 if (kvm->arch.use_irqchip) {
2469                         /* Set up dummy routing. */
2470                         memset(&routing, 0, sizeof(routing));
2471                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2472                 }
2473                 break;
2474         }
2475         case KVM_SET_DEVICE_ATTR: {
2476                 r = -EFAULT;
2477                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2478                         break;
2479                 r = kvm_s390_vm_set_attr(kvm, &attr);
2480                 break;
2481         }
2482         case KVM_GET_DEVICE_ATTR: {
2483                 r = -EFAULT;
2484                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2485                         break;
2486                 r = kvm_s390_vm_get_attr(kvm, &attr);
2487                 break;
2488         }
2489         case KVM_HAS_DEVICE_ATTR: {
2490                 r = -EFAULT;
2491                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2492                         break;
2493                 r = kvm_s390_vm_has_attr(kvm, &attr);
2494                 break;
2495         }
2496         case KVM_S390_GET_SKEYS: {
2497                 struct kvm_s390_skeys args;
2498
2499                 r = -EFAULT;
2500                 if (copy_from_user(&args, argp,
2501                                    sizeof(struct kvm_s390_skeys)))
2502                         break;
2503                 r = kvm_s390_get_skeys(kvm, &args);
2504                 break;
2505         }
2506         case KVM_S390_SET_SKEYS: {
2507                 struct kvm_s390_skeys args;
2508
2509                 r = -EFAULT;
2510                 if (copy_from_user(&args, argp,
2511                                    sizeof(struct kvm_s390_skeys)))
2512                         break;
2513                 r = kvm_s390_set_skeys(kvm, &args);
2514                 break;
2515         }
2516         case KVM_S390_GET_CMMA_BITS: {
2517                 struct kvm_s390_cmma_log args;
2518
2519                 r = -EFAULT;
2520                 if (copy_from_user(&args, argp, sizeof(args)))
2521                         break;
2522                 mutex_lock(&kvm->slots_lock);
2523                 r = kvm_s390_get_cmma_bits(kvm, &args);
2524                 mutex_unlock(&kvm->slots_lock);
2525                 if (!r) {
2526                         r = copy_to_user(argp, &args, sizeof(args));
2527                         if (r)
2528                                 r = -EFAULT;
2529                 }
2530                 break;
2531         }
2532         case KVM_S390_SET_CMMA_BITS: {
2533                 struct kvm_s390_cmma_log args;
2534
2535                 r = -EFAULT;
2536                 if (copy_from_user(&args, argp, sizeof(args)))
2537                         break;
2538                 mutex_lock(&kvm->slots_lock);
2539                 r = kvm_s390_set_cmma_bits(kvm, &args);
2540                 mutex_unlock(&kvm->slots_lock);
2541                 break;
2542         }
2543         case KVM_S390_PV_COMMAND: {
2544                 struct kvm_pv_cmd args;
2545
2546                 /* protvirt means user cpu state */
2547                 kvm_s390_set_user_cpu_state_ctrl(kvm);
2548                 r = 0;
2549                 if (!is_prot_virt_host()) {
2550                         r = -EINVAL;
2551                         break;
2552                 }
2553                 if (copy_from_user(&args, argp, sizeof(args))) {
2554                         r = -EFAULT;
2555                         break;
2556                 }
2557                 if (args.flags) {
2558                         r = -EINVAL;
2559                         break;
2560                 }
2561                 mutex_lock(&kvm->lock);
2562                 r = kvm_s390_handle_pv(kvm, &args);
2563                 mutex_unlock(&kvm->lock);
2564                 if (copy_to_user(argp, &args, sizeof(args))) {
2565                         r = -EFAULT;
2566                         break;
2567                 }
2568                 break;
2569         }
2570         case KVM_S390_MEM_OP: {
2571                 struct kvm_s390_mem_op mem_op;
2572
2573                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2574                         r = kvm_s390_vm_mem_op(kvm, &mem_op);
2575                 else
2576                         r = -EFAULT;
2577                 break;
2578         }
2579         default:
2580                 r = -ENOTTY;
2581         }
2582
2583         return r;
2584 }
2585
2586 static int kvm_s390_apxa_installed(void)
2587 {
2588         struct ap_config_info info;
2589
2590         if (ap_instructions_available()) {
2591                 if (ap_qci(&info) == 0)
2592                         return info.apxa;
2593         }
2594
2595         return 0;
2596 }
2597
2598 /*
2599  * The format of the crypto control block (CRYCB) is specified in the 3 low
2600  * order bits of the CRYCB designation (CRYCBD) field as follows:
2601  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2602  *           AP extended addressing (APXA) facility are installed.
2603  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2604  * Format 2: Both the APXA and MSAX3 facilities are installed
2605  */
2606 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2607 {
2608         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2609
2610         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2611         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2612
2613         /* Check whether MSAX3 is installed */
2614         if (!test_kvm_facility(kvm, 76))
2615                 return;
2616
2617         if (kvm_s390_apxa_installed())
2618                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2619         else
2620                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2621 }
2622
2623 /*
2624  * kvm_arch_crypto_set_masks
2625  *
2626  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2627  *       to be set.
2628  * @apm: the mask identifying the accessible AP adapters
2629  * @aqm: the mask identifying the accessible AP domains
2630  * @adm: the mask identifying the accessible AP control domains
2631  *
2632  * Set the masks that identify the adapters, domains and control domains to
2633  * which the KVM guest is granted access.
2634  *
2635  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2636  *       function.
2637  */
2638 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2639                                unsigned long *aqm, unsigned long *adm)
2640 {
2641         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2642
2643         kvm_s390_vcpu_block_all(kvm);
2644
2645         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2646         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2647                 memcpy(crycb->apcb1.apm, apm, 32);
2648                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2649                          apm[0], apm[1], apm[2], apm[3]);
2650                 memcpy(crycb->apcb1.aqm, aqm, 32);
2651                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2652                          aqm[0], aqm[1], aqm[2], aqm[3]);
2653                 memcpy(crycb->apcb1.adm, adm, 32);
2654                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2655                          adm[0], adm[1], adm[2], adm[3]);
2656                 break;
2657         case CRYCB_FORMAT1:
2658         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2659                 memcpy(crycb->apcb0.apm, apm, 8);
2660                 memcpy(crycb->apcb0.aqm, aqm, 2);
2661                 memcpy(crycb->apcb0.adm, adm, 2);
2662                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2663                          apm[0], *((unsigned short *)aqm),
2664                          *((unsigned short *)adm));
2665                 break;
2666         default:        /* Can not happen */
2667                 break;
2668         }
2669
2670         /* recreate the shadow crycb for each vcpu */
2671         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2672         kvm_s390_vcpu_unblock_all(kvm);
2673 }
2674 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2675
2676 /*
2677  * kvm_arch_crypto_clear_masks
2678  *
2679  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2680  *       to be cleared.
2681  *
2682  * Clear the masks that identify the adapters, domains and control domains to
2683  * which the KVM guest is granted access.
2684  *
2685  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2686  *       function.
2687  */
2688 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2689 {
2690         kvm_s390_vcpu_block_all(kvm);
2691
2692         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2693                sizeof(kvm->arch.crypto.crycb->apcb0));
2694         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2695                sizeof(kvm->arch.crypto.crycb->apcb1));
2696
2697         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2698         /* recreate the shadow crycb for each vcpu */
2699         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2700         kvm_s390_vcpu_unblock_all(kvm);
2701 }
2702 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2703
2704 static u64 kvm_s390_get_initial_cpuid(void)
2705 {
2706         struct cpuid cpuid;
2707
2708         get_cpu_id(&cpuid);
2709         cpuid.version = 0xff;
2710         return *((u64 *) &cpuid);
2711 }
2712
2713 static void kvm_s390_crypto_init(struct kvm *kvm)
2714 {
2715         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2716         kvm_s390_set_crycb_format(kvm);
2717         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2718
2719         if (!test_kvm_facility(kvm, 76))
2720                 return;
2721
2722         /* Enable AES/DEA protected key functions by default */
2723         kvm->arch.crypto.aes_kw = 1;
2724         kvm->arch.crypto.dea_kw = 1;
2725         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2726                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2727         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2728                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2729 }
2730
2731 static void sca_dispose(struct kvm *kvm)
2732 {
2733         if (kvm->arch.use_esca)
2734                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2735         else
2736                 free_page((unsigned long)(kvm->arch.sca));
2737         kvm->arch.sca = NULL;
2738 }
2739
2740 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2741 {
2742         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2743         int i, rc;
2744         char debug_name[16];
2745         static unsigned long sca_offset;
2746
2747         rc = -EINVAL;
2748 #ifdef CONFIG_KVM_S390_UCONTROL
2749         if (type & ~KVM_VM_S390_UCONTROL)
2750                 goto out_err;
2751         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2752                 goto out_err;
2753 #else
2754         if (type)
2755                 goto out_err;
2756 #endif
2757
2758         rc = s390_enable_sie();
2759         if (rc)
2760                 goto out_err;
2761
2762         rc = -ENOMEM;
2763
2764         if (!sclp.has_64bscao)
2765                 alloc_flags |= GFP_DMA;
2766         rwlock_init(&kvm->arch.sca_lock);
2767         /* start with basic SCA */
2768         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2769         if (!kvm->arch.sca)
2770                 goto out_err;
2771         mutex_lock(&kvm_lock);
2772         sca_offset += 16;
2773         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2774                 sca_offset = 0;
2775         kvm->arch.sca = (struct bsca_block *)
2776                         ((char *) kvm->arch.sca + sca_offset);
2777         mutex_unlock(&kvm_lock);
2778
2779         sprintf(debug_name, "kvm-%u", current->pid);
2780
2781         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2782         if (!kvm->arch.dbf)
2783                 goto out_err;
2784
2785         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2786         kvm->arch.sie_page2 =
2787              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2788         if (!kvm->arch.sie_page2)
2789                 goto out_err;
2790
2791         kvm->arch.sie_page2->kvm = kvm;
2792         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2793
2794         for (i = 0; i < kvm_s390_fac_size(); i++) {
2795                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2796                                               (kvm_s390_fac_base[i] |
2797                                                kvm_s390_fac_ext[i]);
2798                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2799                                               kvm_s390_fac_base[i];
2800         }
2801         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2802
2803         /* we are always in czam mode - even on pre z14 machines */
2804         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2805         set_kvm_facility(kvm->arch.model.fac_list, 138);
2806         /* we emulate STHYI in kvm */
2807         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2808         set_kvm_facility(kvm->arch.model.fac_list, 74);
2809         if (MACHINE_HAS_TLB_GUEST) {
2810                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2811                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2812         }
2813
2814         if (css_general_characteristics.aiv && test_facility(65))
2815                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2816
2817         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2818         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2819
2820         kvm_s390_crypto_init(kvm);
2821
2822         mutex_init(&kvm->arch.float_int.ais_lock);
2823         spin_lock_init(&kvm->arch.float_int.lock);
2824         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2825                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2826         init_waitqueue_head(&kvm->arch.ipte_wq);
2827         mutex_init(&kvm->arch.ipte_mutex);
2828
2829         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2830         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2831
2832         if (type & KVM_VM_S390_UCONTROL) {
2833                 kvm->arch.gmap = NULL;
2834                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2835         } else {
2836                 if (sclp.hamax == U64_MAX)
2837                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2838                 else
2839                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2840                                                     sclp.hamax + 1);
2841                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2842                 if (!kvm->arch.gmap)
2843                         goto out_err;
2844                 kvm->arch.gmap->private = kvm;
2845                 kvm->arch.gmap->pfault_enabled = 0;
2846         }
2847
2848         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2849         kvm->arch.use_skf = sclp.has_skey;
2850         spin_lock_init(&kvm->arch.start_stop_lock);
2851         kvm_s390_vsie_init(kvm);
2852         if (use_gisa)
2853                 kvm_s390_gisa_init(kvm);
2854         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2855
2856         return 0;
2857 out_err:
2858         free_page((unsigned long)kvm->arch.sie_page2);
2859         debug_unregister(kvm->arch.dbf);
2860         sca_dispose(kvm);
2861         KVM_EVENT(3, "creation of vm failed: %d", rc);
2862         return rc;
2863 }
2864
2865 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2866 {
2867         u16 rc, rrc;
2868
2869         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2870         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2871         kvm_s390_clear_local_irqs(vcpu);
2872         kvm_clear_async_pf_completion_queue(vcpu);
2873         if (!kvm_is_ucontrol(vcpu->kvm))
2874                 sca_del_vcpu(vcpu);
2875
2876         if (kvm_is_ucontrol(vcpu->kvm))
2877                 gmap_remove(vcpu->arch.gmap);
2878
2879         if (vcpu->kvm->arch.use_cmma)
2880                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2881         /* We can not hold the vcpu mutex here, we are already dying */
2882         if (kvm_s390_pv_cpu_get_handle(vcpu))
2883                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2884         free_page((unsigned long)(vcpu->arch.sie_block));
2885 }
2886
2887 void kvm_arch_destroy_vm(struct kvm *kvm)
2888 {
2889         u16 rc, rrc;
2890
2891         kvm_destroy_vcpus(kvm);
2892         sca_dispose(kvm);
2893         kvm_s390_gisa_destroy(kvm);
2894         /*
2895          * We are already at the end of life and kvm->lock is not taken.
2896          * This is ok as the file descriptor is closed by now and nobody
2897          * can mess with the pv state. To avoid lockdep_assert_held from
2898          * complaining we do not use kvm_s390_pv_is_protected.
2899          */
2900         if (kvm_s390_pv_get_handle(kvm))
2901                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2902         debug_unregister(kvm->arch.dbf);
2903         free_page((unsigned long)kvm->arch.sie_page2);
2904         if (!kvm_is_ucontrol(kvm))
2905                 gmap_remove(kvm->arch.gmap);
2906         kvm_s390_destroy_adapters(kvm);
2907         kvm_s390_clear_float_irqs(kvm);
2908         kvm_s390_vsie_destroy(kvm);
2909         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2910 }
2911
2912 /* Section: vcpu related */
2913 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2914 {
2915         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2916         if (!vcpu->arch.gmap)
2917                 return -ENOMEM;
2918         vcpu->arch.gmap->private = vcpu->kvm;
2919
2920         return 0;
2921 }
2922
2923 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2924 {
2925         if (!kvm_s390_use_sca_entries())
2926                 return;
2927         read_lock(&vcpu->kvm->arch.sca_lock);
2928         if (vcpu->kvm->arch.use_esca) {
2929                 struct esca_block *sca = vcpu->kvm->arch.sca;
2930
2931                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2932                 sca->cpu[vcpu->vcpu_id].sda = 0;
2933         } else {
2934                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2935
2936                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2937                 sca->cpu[vcpu->vcpu_id].sda = 0;
2938         }
2939         read_unlock(&vcpu->kvm->arch.sca_lock);
2940 }
2941
2942 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2943 {
2944         if (!kvm_s390_use_sca_entries()) {
2945                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2946
2947                 /* we still need the basic sca for the ipte control */
2948                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2949                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2950                 return;
2951         }
2952         read_lock(&vcpu->kvm->arch.sca_lock);
2953         if (vcpu->kvm->arch.use_esca) {
2954                 struct esca_block *sca = vcpu->kvm->arch.sca;
2955
2956                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2957                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2958                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2959                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2960                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2961         } else {
2962                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2963
2964                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2965                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2966                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2967                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2968         }
2969         read_unlock(&vcpu->kvm->arch.sca_lock);
2970 }
2971
2972 /* Basic SCA to Extended SCA data copy routines */
2973 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2974 {
2975         d->sda = s->sda;
2976         d->sigp_ctrl.c = s->sigp_ctrl.c;
2977         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2978 }
2979
2980 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2981 {
2982         int i;
2983
2984         d->ipte_control = s->ipte_control;
2985         d->mcn[0] = s->mcn;
2986         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2987                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2988 }
2989
2990 static int sca_switch_to_extended(struct kvm *kvm)
2991 {
2992         struct bsca_block *old_sca = kvm->arch.sca;
2993         struct esca_block *new_sca;
2994         struct kvm_vcpu *vcpu;
2995         unsigned long vcpu_idx;
2996         u32 scaol, scaoh;
2997
2998         if (kvm->arch.use_esca)
2999                 return 0;
3000
3001         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
3002         if (!new_sca)
3003                 return -ENOMEM;
3004
3005         scaoh = (u32)((u64)(new_sca) >> 32);
3006         scaol = (u32)(u64)(new_sca) & ~0x3fU;
3007
3008         kvm_s390_vcpu_block_all(kvm);
3009         write_lock(&kvm->arch.sca_lock);
3010
3011         sca_copy_b_to_e(new_sca, old_sca);
3012
3013         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
3014                 vcpu->arch.sie_block->scaoh = scaoh;
3015                 vcpu->arch.sie_block->scaol = scaol;
3016                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
3017         }
3018         kvm->arch.sca = new_sca;
3019         kvm->arch.use_esca = 1;
3020
3021         write_unlock(&kvm->arch.sca_lock);
3022         kvm_s390_vcpu_unblock_all(kvm);
3023
3024         free_page((unsigned long)old_sca);
3025
3026         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
3027                  old_sca, kvm->arch.sca);
3028         return 0;
3029 }
3030
3031 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
3032 {
3033         int rc;
3034
3035         if (!kvm_s390_use_sca_entries()) {
3036                 if (id < KVM_MAX_VCPUS)
3037                         return true;
3038                 return false;
3039         }
3040         if (id < KVM_S390_BSCA_CPU_SLOTS)
3041                 return true;
3042         if (!sclp.has_esca || !sclp.has_64bscao)
3043                 return false;
3044
3045         mutex_lock(&kvm->lock);
3046         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3047         mutex_unlock(&kvm->lock);
3048
3049         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3050 }
3051
3052 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3053 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3054 {
3055         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3056         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3057         vcpu->arch.cputm_start = get_tod_clock_fast();
3058         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3059 }
3060
3061 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3062 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3063 {
3064         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3065         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3066         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3067         vcpu->arch.cputm_start = 0;
3068         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3069 }
3070
3071 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3072 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3073 {
3074         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3075         vcpu->arch.cputm_enabled = true;
3076         __start_cpu_timer_accounting(vcpu);
3077 }
3078
3079 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3080 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3081 {
3082         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3083         __stop_cpu_timer_accounting(vcpu);
3084         vcpu->arch.cputm_enabled = false;
3085 }
3086
3087 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3088 {
3089         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3090         __enable_cpu_timer_accounting(vcpu);
3091         preempt_enable();
3092 }
3093
3094 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3095 {
3096         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3097         __disable_cpu_timer_accounting(vcpu);
3098         preempt_enable();
3099 }
3100
3101 /* set the cpu timer - may only be called from the VCPU thread itself */
3102 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3103 {
3104         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3105         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3106         if (vcpu->arch.cputm_enabled)
3107                 vcpu->arch.cputm_start = get_tod_clock_fast();
3108         vcpu->arch.sie_block->cputm = cputm;
3109         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3110         preempt_enable();
3111 }
3112
3113 /* update and get the cpu timer - can also be called from other VCPU threads */
3114 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3115 {
3116         unsigned int seq;
3117         __u64 value;
3118
3119         if (unlikely(!vcpu->arch.cputm_enabled))
3120                 return vcpu->arch.sie_block->cputm;
3121
3122         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3123         do {
3124                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3125                 /*
3126                  * If the writer would ever execute a read in the critical
3127                  * section, e.g. in irq context, we have a deadlock.
3128                  */
3129                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3130                 value = vcpu->arch.sie_block->cputm;
3131                 /* if cputm_start is 0, accounting is being started/stopped */
3132                 if (likely(vcpu->arch.cputm_start))
3133                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3134         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3135         preempt_enable();
3136         return value;
3137 }
3138
3139 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3140 {
3141
3142         gmap_enable(vcpu->arch.enabled_gmap);
3143         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3144         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3145                 __start_cpu_timer_accounting(vcpu);
3146         vcpu->cpu = cpu;
3147 }
3148
3149 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3150 {
3151         vcpu->cpu = -1;
3152         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3153                 __stop_cpu_timer_accounting(vcpu);
3154         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3155         vcpu->arch.enabled_gmap = gmap_get_enabled();
3156         gmap_disable(vcpu->arch.enabled_gmap);
3157
3158 }
3159
3160 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3161 {
3162         mutex_lock(&vcpu->kvm->lock);
3163         preempt_disable();
3164         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3165         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3166         preempt_enable();
3167         mutex_unlock(&vcpu->kvm->lock);
3168         if (!kvm_is_ucontrol(vcpu->kvm)) {
3169                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3170                 sca_add_vcpu(vcpu);
3171         }
3172         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3173                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3174         /* make vcpu_load load the right gmap on the first trigger */
3175         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3176 }
3177
3178 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3179 {
3180         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3181             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3182                 return true;
3183         return false;
3184 }
3185
3186 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3187 {
3188         /* At least one ECC subfunction must be present */
3189         return kvm_has_pckmo_subfunc(kvm, 32) ||
3190                kvm_has_pckmo_subfunc(kvm, 33) ||
3191                kvm_has_pckmo_subfunc(kvm, 34) ||
3192                kvm_has_pckmo_subfunc(kvm, 40) ||
3193                kvm_has_pckmo_subfunc(kvm, 41);
3194
3195 }
3196
3197 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3198 {
3199         /*
3200          * If the AP instructions are not being interpreted and the MSAX3
3201          * facility is not configured for the guest, there is nothing to set up.
3202          */
3203         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3204                 return;
3205
3206         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3207         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3208         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3209         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3210
3211         if (vcpu->kvm->arch.crypto.apie)
3212                 vcpu->arch.sie_block->eca |= ECA_APIE;
3213
3214         /* Set up protected key support */
3215         if (vcpu->kvm->arch.crypto.aes_kw) {
3216                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3217                 /* ecc is also wrapped with AES key */
3218                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3219                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3220         }
3221
3222         if (vcpu->kvm->arch.crypto.dea_kw)
3223                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3224 }
3225
3226 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3227 {
3228         free_page(vcpu->arch.sie_block->cbrlo);
3229         vcpu->arch.sie_block->cbrlo = 0;
3230 }
3231
3232 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3233 {
3234         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3235         if (!vcpu->arch.sie_block->cbrlo)
3236                 return -ENOMEM;
3237         return 0;
3238 }
3239
3240 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3241 {
3242         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3243
3244         vcpu->arch.sie_block->ibc = model->ibc;
3245         if (test_kvm_facility(vcpu->kvm, 7))
3246                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3247 }
3248
3249 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3250 {
3251         int rc = 0;
3252         u16 uvrc, uvrrc;
3253
3254         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3255                                                     CPUSTAT_SM |
3256                                                     CPUSTAT_STOPPED);
3257
3258         if (test_kvm_facility(vcpu->kvm, 78))
3259                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3260         else if (test_kvm_facility(vcpu->kvm, 8))
3261                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3262
3263         kvm_s390_vcpu_setup_model(vcpu);
3264
3265         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3266         if (MACHINE_HAS_ESOP)
3267                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3268         if (test_kvm_facility(vcpu->kvm, 9))
3269                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3270         if (test_kvm_facility(vcpu->kvm, 73))
3271                 vcpu->arch.sie_block->ecb |= ECB_TE;
3272         if (!kvm_is_ucontrol(vcpu->kvm))
3273                 vcpu->arch.sie_block->ecb |= ECB_SPECI;
3274
3275         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3276                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3277         if (test_kvm_facility(vcpu->kvm, 130))
3278                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3279         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3280         if (sclp.has_cei)
3281                 vcpu->arch.sie_block->eca |= ECA_CEI;
3282         if (sclp.has_ib)
3283                 vcpu->arch.sie_block->eca |= ECA_IB;
3284         if (sclp.has_siif)
3285                 vcpu->arch.sie_block->eca |= ECA_SII;
3286         if (sclp.has_sigpif)
3287                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3288         if (test_kvm_facility(vcpu->kvm, 129)) {
3289                 vcpu->arch.sie_block->eca |= ECA_VX;
3290                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3291         }
3292         if (test_kvm_facility(vcpu->kvm, 139))
3293                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3294         if (test_kvm_facility(vcpu->kvm, 156))
3295                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3296         if (vcpu->arch.sie_block->gd) {
3297                 vcpu->arch.sie_block->eca |= ECA_AIV;
3298                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3299                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3300         }
3301         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3302                                         | SDNXC;
3303         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3304
3305         if (sclp.has_kss)
3306                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3307         else
3308                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3309
3310         if (vcpu->kvm->arch.use_cmma) {
3311                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3312                 if (rc)
3313                         return rc;
3314         }
3315         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3316         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3317
3318         vcpu->arch.sie_block->hpid = HPID_KVM;
3319
3320         kvm_s390_vcpu_crypto_setup(vcpu);
3321
3322         mutex_lock(&vcpu->kvm->lock);
3323         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3324                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3325                 if (rc)
3326                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3327         }
3328         mutex_unlock(&vcpu->kvm->lock);
3329
3330         return rc;
3331 }
3332
3333 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3334 {
3335         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3336                 return -EINVAL;
3337         return 0;
3338 }
3339
3340 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3341 {
3342         struct sie_page *sie_page;
3343         int rc;
3344
3345         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3346         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3347         if (!sie_page)
3348                 return -ENOMEM;
3349
3350         vcpu->arch.sie_block = &sie_page->sie_block;
3351         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3352
3353         /* the real guest size will always be smaller than msl */
3354         vcpu->arch.sie_block->mso = 0;
3355         vcpu->arch.sie_block->msl = sclp.hamax;
3356
3357         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3358         spin_lock_init(&vcpu->arch.local_int.lock);
3359         vcpu->arch.sie_block->gd = kvm_s390_get_gisa_desc(vcpu->kvm);
3360         seqcount_init(&vcpu->arch.cputm_seqcount);
3361
3362         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3363         kvm_clear_async_pf_completion_queue(vcpu);
3364         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3365                                     KVM_SYNC_GPRS |
3366                                     KVM_SYNC_ACRS |
3367                                     KVM_SYNC_CRS |
3368                                     KVM_SYNC_ARCH0 |
3369                                     KVM_SYNC_PFAULT |
3370                                     KVM_SYNC_DIAG318;
3371         kvm_s390_set_prefix(vcpu, 0);
3372         if (test_kvm_facility(vcpu->kvm, 64))
3373                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3374         if (test_kvm_facility(vcpu->kvm, 82))
3375                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3376         if (test_kvm_facility(vcpu->kvm, 133))
3377                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3378         if (test_kvm_facility(vcpu->kvm, 156))
3379                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3380         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3381          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3382          */
3383         if (MACHINE_HAS_VX)
3384                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3385         else
3386                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3387
3388         if (kvm_is_ucontrol(vcpu->kvm)) {
3389                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3390                 if (rc)
3391                         goto out_free_sie_block;
3392         }
3393
3394         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3395                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3396         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3397
3398         rc = kvm_s390_vcpu_setup(vcpu);
3399         if (rc)
3400                 goto out_ucontrol_uninit;
3401         return 0;
3402
3403 out_ucontrol_uninit:
3404         if (kvm_is_ucontrol(vcpu->kvm))
3405                 gmap_remove(vcpu->arch.gmap);
3406 out_free_sie_block:
3407         free_page((unsigned long)(vcpu->arch.sie_block));
3408         return rc;
3409 }
3410
3411 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3412 {
3413         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
3414         return kvm_s390_vcpu_has_irq(vcpu, 0);
3415 }
3416
3417 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3418 {
3419         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3420 }
3421
3422 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3423 {
3424         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3425         exit_sie(vcpu);
3426 }
3427
3428 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3429 {
3430         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3431 }
3432
3433 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3434 {
3435         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3436         exit_sie(vcpu);
3437 }
3438
3439 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3440 {
3441         return atomic_read(&vcpu->arch.sie_block->prog20) &
3442                (PROG_BLOCK_SIE | PROG_REQUEST);
3443 }
3444
3445 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3446 {
3447         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3448 }
3449
3450 /*
3451  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3452  * If the CPU is not running (e.g. waiting as idle) the function will
3453  * return immediately. */
3454 void exit_sie(struct kvm_vcpu *vcpu)
3455 {
3456         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3457         kvm_s390_vsie_kick(vcpu);
3458         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3459                 cpu_relax();
3460 }
3461
3462 /* Kick a guest cpu out of SIE to process a request synchronously */
3463 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3464 {
3465         __kvm_make_request(req, vcpu);
3466         kvm_s390_vcpu_request(vcpu);
3467 }
3468
3469 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3470                               unsigned long end)
3471 {
3472         struct kvm *kvm = gmap->private;
3473         struct kvm_vcpu *vcpu;
3474         unsigned long prefix;
3475         unsigned long i;
3476
3477         if (gmap_is_shadow(gmap))
3478                 return;
3479         if (start >= 1UL << 31)
3480                 /* We are only interested in prefix pages */
3481                 return;
3482         kvm_for_each_vcpu(i, vcpu, kvm) {
3483                 /* match against both prefix pages */
3484                 prefix = kvm_s390_get_prefix(vcpu);
3485                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3486                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3487                                    start, end);
3488                         kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3489                 }
3490         }
3491 }
3492
3493 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3494 {
3495         /* do not poll with more than halt_poll_max_steal percent of steal time */
3496         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3497             READ_ONCE(halt_poll_max_steal)) {
3498                 vcpu->stat.halt_no_poll_steal++;
3499                 return true;
3500         }
3501         return false;
3502 }
3503
3504 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3505 {
3506         /* kvm common code refers to this, but never calls it */
3507         BUG();
3508         return 0;
3509 }
3510
3511 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3512                                            struct kvm_one_reg *reg)
3513 {
3514         int r = -EINVAL;
3515
3516         switch (reg->id) {
3517         case KVM_REG_S390_TODPR:
3518                 r = put_user(vcpu->arch.sie_block->todpr,
3519                              (u32 __user *)reg->addr);
3520                 break;
3521         case KVM_REG_S390_EPOCHDIFF:
3522                 r = put_user(vcpu->arch.sie_block->epoch,
3523                              (u64 __user *)reg->addr);
3524                 break;
3525         case KVM_REG_S390_CPU_TIMER:
3526                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3527                              (u64 __user *)reg->addr);
3528                 break;
3529         case KVM_REG_S390_CLOCK_COMP:
3530                 r = put_user(vcpu->arch.sie_block->ckc,
3531                              (u64 __user *)reg->addr);
3532                 break;
3533         case KVM_REG_S390_PFTOKEN:
3534                 r = put_user(vcpu->arch.pfault_token,
3535                              (u64 __user *)reg->addr);
3536                 break;
3537         case KVM_REG_S390_PFCOMPARE:
3538                 r = put_user(vcpu->arch.pfault_compare,
3539                              (u64 __user *)reg->addr);
3540                 break;
3541         case KVM_REG_S390_PFSELECT:
3542                 r = put_user(vcpu->arch.pfault_select,
3543                              (u64 __user *)reg->addr);
3544                 break;
3545         case KVM_REG_S390_PP:
3546                 r = put_user(vcpu->arch.sie_block->pp,
3547                              (u64 __user *)reg->addr);
3548                 break;
3549         case KVM_REG_S390_GBEA:
3550                 r = put_user(vcpu->arch.sie_block->gbea,
3551                              (u64 __user *)reg->addr);
3552                 break;
3553         default:
3554                 break;
3555         }
3556
3557         return r;
3558 }
3559
3560 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3561                                            struct kvm_one_reg *reg)
3562 {
3563         int r = -EINVAL;
3564         __u64 val;
3565
3566         switch (reg->id) {
3567         case KVM_REG_S390_TODPR:
3568                 r = get_user(vcpu->arch.sie_block->todpr,
3569                              (u32 __user *)reg->addr);
3570                 break;
3571         case KVM_REG_S390_EPOCHDIFF:
3572                 r = get_user(vcpu->arch.sie_block->epoch,
3573                              (u64 __user *)reg->addr);
3574                 break;
3575         case KVM_REG_S390_CPU_TIMER:
3576                 r = get_user(val, (u64 __user *)reg->addr);
3577                 if (!r)
3578                         kvm_s390_set_cpu_timer(vcpu, val);
3579                 break;
3580         case KVM_REG_S390_CLOCK_COMP:
3581                 r = get_user(vcpu->arch.sie_block->ckc,
3582                              (u64 __user *)reg->addr);
3583                 break;
3584         case KVM_REG_S390_PFTOKEN:
3585                 r = get_user(vcpu->arch.pfault_token,
3586                              (u64 __user *)reg->addr);
3587                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3588                         kvm_clear_async_pf_completion_queue(vcpu);
3589                 break;
3590         case KVM_REG_S390_PFCOMPARE:
3591                 r = get_user(vcpu->arch.pfault_compare,
3592                              (u64 __user *)reg->addr);
3593                 break;
3594         case KVM_REG_S390_PFSELECT:
3595                 r = get_user(vcpu->arch.pfault_select,
3596                              (u64 __user *)reg->addr);
3597                 break;
3598         case KVM_REG_S390_PP:
3599                 r = get_user(vcpu->arch.sie_block->pp,
3600                              (u64 __user *)reg->addr);
3601                 break;
3602         case KVM_REG_S390_GBEA:
3603                 r = get_user(vcpu->arch.sie_block->gbea,
3604                              (u64 __user *)reg->addr);
3605                 break;
3606         default:
3607                 break;
3608         }
3609
3610         return r;
3611 }
3612
3613 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3614 {
3615         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3616         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3617         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3618
3619         kvm_clear_async_pf_completion_queue(vcpu);
3620         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3621                 kvm_s390_vcpu_stop(vcpu);
3622         kvm_s390_clear_local_irqs(vcpu);
3623 }
3624
3625 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3626 {
3627         /* Initial reset is a superset of the normal reset */
3628         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3629
3630         /*
3631          * This equals initial cpu reset in pop, but we don't switch to ESA.
3632          * We do not only reset the internal data, but also ...
3633          */
3634         vcpu->arch.sie_block->gpsw.mask = 0;
3635         vcpu->arch.sie_block->gpsw.addr = 0;
3636         kvm_s390_set_prefix(vcpu, 0);
3637         kvm_s390_set_cpu_timer(vcpu, 0);
3638         vcpu->arch.sie_block->ckc = 0;
3639         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3640         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3641         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3642
3643         /* ... the data in sync regs */
3644         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3645         vcpu->run->s.regs.ckc = 0;
3646         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3647         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3648         vcpu->run->psw_addr = 0;
3649         vcpu->run->psw_mask = 0;
3650         vcpu->run->s.regs.todpr = 0;
3651         vcpu->run->s.regs.cputm = 0;
3652         vcpu->run->s.regs.ckc = 0;
3653         vcpu->run->s.regs.pp = 0;
3654         vcpu->run->s.regs.gbea = 1;
3655         vcpu->run->s.regs.fpc = 0;
3656         /*
3657          * Do not reset these registers in the protected case, as some of
3658          * them are overlayed and they are not accessible in this case
3659          * anyway.
3660          */
3661         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3662                 vcpu->arch.sie_block->gbea = 1;
3663                 vcpu->arch.sie_block->pp = 0;
3664                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3665                 vcpu->arch.sie_block->todpr = 0;
3666         }
3667 }
3668
3669 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3670 {
3671         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3672
3673         /* Clear reset is a superset of the initial reset */
3674         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3675
3676         memset(&regs->gprs, 0, sizeof(regs->gprs));
3677         memset(&regs->vrs, 0, sizeof(regs->vrs));
3678         memset(&regs->acrs, 0, sizeof(regs->acrs));
3679         memset(&regs->gscb, 0, sizeof(regs->gscb));
3680
3681         regs->etoken = 0;
3682         regs->etoken_extension = 0;
3683 }
3684
3685 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3686 {
3687         vcpu_load(vcpu);
3688         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3689         vcpu_put(vcpu);
3690         return 0;
3691 }
3692
3693 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3694 {
3695         vcpu_load(vcpu);
3696         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3697         vcpu_put(vcpu);
3698         return 0;
3699 }
3700
3701 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3702                                   struct kvm_sregs *sregs)
3703 {
3704         vcpu_load(vcpu);
3705
3706         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3707         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3708
3709         vcpu_put(vcpu);
3710         return 0;
3711 }
3712
3713 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3714                                   struct kvm_sregs *sregs)
3715 {
3716         vcpu_load(vcpu);
3717
3718         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3719         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3720
3721         vcpu_put(vcpu);
3722         return 0;
3723 }
3724
3725 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3726 {
3727         int ret = 0;
3728
3729         vcpu_load(vcpu);
3730
3731         if (test_fp_ctl(fpu->fpc)) {
3732                 ret = -EINVAL;
3733                 goto out;
3734         }
3735         vcpu->run->s.regs.fpc = fpu->fpc;
3736         if (MACHINE_HAS_VX)
3737                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3738                                  (freg_t *) fpu->fprs);
3739         else
3740                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3741
3742 out:
3743         vcpu_put(vcpu);
3744         return ret;
3745 }
3746
3747 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3748 {
3749         vcpu_load(vcpu);
3750
3751         /* make sure we have the latest values */
3752         save_fpu_regs();
3753         if (MACHINE_HAS_VX)
3754                 convert_vx_to_fp((freg_t *) fpu->fprs,
3755                                  (__vector128 *) vcpu->run->s.regs.vrs);
3756         else
3757                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3758         fpu->fpc = vcpu->run->s.regs.fpc;
3759
3760         vcpu_put(vcpu);
3761         return 0;
3762 }
3763
3764 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3765 {
3766         int rc = 0;
3767
3768         if (!is_vcpu_stopped(vcpu))
3769                 rc = -EBUSY;
3770         else {
3771                 vcpu->run->psw_mask = psw.mask;
3772                 vcpu->run->psw_addr = psw.addr;
3773         }
3774         return rc;
3775 }
3776
3777 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3778                                   struct kvm_translation *tr)
3779 {
3780         return -EINVAL; /* not implemented yet */
3781 }
3782
3783 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3784                               KVM_GUESTDBG_USE_HW_BP | \
3785                               KVM_GUESTDBG_ENABLE)
3786
3787 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3788                                         struct kvm_guest_debug *dbg)
3789 {
3790         int rc = 0;
3791
3792         vcpu_load(vcpu);
3793
3794         vcpu->guest_debug = 0;
3795         kvm_s390_clear_bp_data(vcpu);
3796
3797         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3798                 rc = -EINVAL;
3799                 goto out;
3800         }
3801         if (!sclp.has_gpere) {
3802                 rc = -EINVAL;
3803                 goto out;
3804         }
3805
3806         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3807                 vcpu->guest_debug = dbg->control;
3808                 /* enforce guest PER */
3809                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3810
3811                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3812                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3813         } else {
3814                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3815                 vcpu->arch.guestdbg.last_bp = 0;
3816         }
3817
3818         if (rc) {
3819                 vcpu->guest_debug = 0;
3820                 kvm_s390_clear_bp_data(vcpu);
3821                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3822         }
3823
3824 out:
3825         vcpu_put(vcpu);
3826         return rc;
3827 }
3828
3829 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3830                                     struct kvm_mp_state *mp_state)
3831 {
3832         int ret;
3833
3834         vcpu_load(vcpu);
3835
3836         /* CHECK_STOP and LOAD are not supported yet */
3837         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3838                                       KVM_MP_STATE_OPERATING;
3839
3840         vcpu_put(vcpu);
3841         return ret;
3842 }
3843
3844 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3845                                     struct kvm_mp_state *mp_state)
3846 {
3847         int rc = 0;
3848
3849         vcpu_load(vcpu);
3850
3851         /* user space knows about this interface - let it control the state */
3852         kvm_s390_set_user_cpu_state_ctrl(vcpu->kvm);
3853
3854         switch (mp_state->mp_state) {
3855         case KVM_MP_STATE_STOPPED:
3856                 rc = kvm_s390_vcpu_stop(vcpu);
3857                 break;
3858         case KVM_MP_STATE_OPERATING:
3859                 rc = kvm_s390_vcpu_start(vcpu);
3860                 break;
3861         case KVM_MP_STATE_LOAD:
3862                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3863                         rc = -ENXIO;
3864                         break;
3865                 }
3866                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3867                 break;
3868         case KVM_MP_STATE_CHECK_STOP:
3869                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3870         default:
3871                 rc = -ENXIO;
3872         }
3873
3874         vcpu_put(vcpu);
3875         return rc;
3876 }
3877
3878 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3879 {
3880         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3881 }
3882
3883 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3884 {
3885 retry:
3886         kvm_s390_vcpu_request_handled(vcpu);
3887         if (!kvm_request_pending(vcpu))
3888                 return 0;
3889         /*
3890          * If the guest prefix changed, re-arm the ipte notifier for the
3891          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3892          * This ensures that the ipte instruction for this request has
3893          * already finished. We might race against a second unmapper that
3894          * wants to set the blocking bit. Lets just retry the request loop.
3895          */
3896         if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) {
3897                 int rc;
3898                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3899                                           kvm_s390_get_prefix(vcpu),
3900                                           PAGE_SIZE * 2, PROT_WRITE);
3901                 if (rc) {
3902                         kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
3903                         return rc;
3904                 }
3905                 goto retry;
3906         }
3907
3908         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3909                 vcpu->arch.sie_block->ihcpu = 0xffff;
3910                 goto retry;
3911         }
3912
3913         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3914                 if (!ibs_enabled(vcpu)) {
3915                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3916                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3917                 }
3918                 goto retry;
3919         }
3920
3921         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3922                 if (ibs_enabled(vcpu)) {
3923                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3924                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3925                 }
3926                 goto retry;
3927         }
3928
3929         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3930                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3931                 goto retry;
3932         }
3933
3934         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3935                 /*
3936                  * Disable CMM virtualization; we will emulate the ESSA
3937                  * instruction manually, in order to provide additional
3938                  * functionalities needed for live migration.
3939                  */
3940                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3941                 goto retry;
3942         }
3943
3944         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3945                 /*
3946                  * Re-enable CMM virtualization if CMMA is available and
3947                  * CMM has been used.
3948                  */
3949                 if ((vcpu->kvm->arch.use_cmma) &&
3950                     (vcpu->kvm->mm->context.uses_cmm))
3951                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3952                 goto retry;
3953         }
3954
3955         /* nothing to do, just clear the request */
3956         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3957         /* we left the vsie handler, nothing to do, just clear the request */
3958         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3959
3960         return 0;
3961 }
3962
3963 static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3964 {
3965         struct kvm_vcpu *vcpu;
3966         union tod_clock clk;
3967         unsigned long i;
3968
3969         preempt_disable();
3970
3971         store_tod_clock_ext(&clk);
3972
3973         kvm->arch.epoch = gtod->tod - clk.tod;
3974         kvm->arch.epdx = 0;
3975         if (test_kvm_facility(kvm, 139)) {
3976                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3977                 if (kvm->arch.epoch > gtod->tod)
3978                         kvm->arch.epdx -= 1;
3979         }
3980
3981         kvm_s390_vcpu_block_all(kvm);
3982         kvm_for_each_vcpu(i, vcpu, kvm) {
3983                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3984                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3985         }
3986
3987         kvm_s390_vcpu_unblock_all(kvm);
3988         preempt_enable();
3989 }
3990
3991 void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3992 {
3993         mutex_lock(&kvm->lock);
3994         __kvm_s390_set_tod_clock(kvm, gtod);
3995         mutex_unlock(&kvm->lock);
3996 }
3997
3998 int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
3999 {
4000         if (!mutex_trylock(&kvm->lock))
4001                 return 0;
4002         __kvm_s390_set_tod_clock(kvm, gtod);
4003         mutex_unlock(&kvm->lock);
4004         return 1;
4005 }
4006
4007 /**
4008  * kvm_arch_fault_in_page - fault-in guest page if necessary
4009  * @vcpu: The corresponding virtual cpu
4010  * @gpa: Guest physical address
4011  * @writable: Whether the page should be writable or not
4012  *
4013  * Make sure that a guest page has been faulted-in on the host.
4014  *
4015  * Return: Zero on success, negative error code otherwise.
4016  */
4017 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
4018 {
4019         return gmap_fault(vcpu->arch.gmap, gpa,
4020                           writable ? FAULT_FLAG_WRITE : 0);
4021 }
4022
4023 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
4024                                       unsigned long token)
4025 {
4026         struct kvm_s390_interrupt inti;
4027         struct kvm_s390_irq irq;
4028
4029         if (start_token) {
4030                 irq.u.ext.ext_params2 = token;
4031                 irq.type = KVM_S390_INT_PFAULT_INIT;
4032                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
4033         } else {
4034                 inti.type = KVM_S390_INT_PFAULT_DONE;
4035                 inti.parm64 = token;
4036                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
4037         }
4038 }
4039
4040 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
4041                                      struct kvm_async_pf *work)
4042 {
4043         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
4044         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
4045
4046         return true;
4047 }
4048
4049 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
4050                                  struct kvm_async_pf *work)
4051 {
4052         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
4053         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
4054 }
4055
4056 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
4057                                struct kvm_async_pf *work)
4058 {
4059         /* s390 will always inject the page directly */
4060 }
4061
4062 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4063 {
4064         /*
4065          * s390 will always inject the page directly,
4066          * but we still want check_async_completion to cleanup
4067          */
4068         return true;
4069 }
4070
4071 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4072 {
4073         hva_t hva;
4074         struct kvm_arch_async_pf arch;
4075
4076         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4077                 return false;
4078         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4079             vcpu->arch.pfault_compare)
4080                 return false;
4081         if (psw_extint_disabled(vcpu))
4082                 return false;
4083         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4084                 return false;
4085         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4086                 return false;
4087         if (!vcpu->arch.gmap->pfault_enabled)
4088                 return false;
4089
4090         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4091         hva += current->thread.gmap_addr & ~PAGE_MASK;
4092         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4093                 return false;
4094
4095         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4096 }
4097
4098 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4099 {
4100         int rc, cpuflags;
4101
4102         /*
4103          * On s390 notifications for arriving pages will be delivered directly
4104          * to the guest but the house keeping for completed pfaults is
4105          * handled outside the worker.
4106          */
4107         kvm_check_async_pf_completion(vcpu);
4108
4109         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4110         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4111
4112         if (need_resched())
4113                 schedule();
4114
4115         if (!kvm_is_ucontrol(vcpu->kvm)) {
4116                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4117                 if (rc)
4118                         return rc;
4119         }
4120
4121         rc = kvm_s390_handle_requests(vcpu);
4122         if (rc)
4123                 return rc;
4124
4125         if (guestdbg_enabled(vcpu)) {
4126                 kvm_s390_backup_guest_per_regs(vcpu);
4127                 kvm_s390_patch_guest_per_regs(vcpu);
4128         }
4129
4130         clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
4131
4132         vcpu->arch.sie_block->icptcode = 0;
4133         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4134         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4135         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4136
4137         return 0;
4138 }
4139
4140 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4141 {
4142         struct kvm_s390_pgm_info pgm_info = {
4143                 .code = PGM_ADDRESSING,
4144         };
4145         u8 opcode, ilen;
4146         int rc;
4147
4148         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4149         trace_kvm_s390_sie_fault(vcpu);
4150
4151         /*
4152          * We want to inject an addressing exception, which is defined as a
4153          * suppressing or terminating exception. However, since we came here
4154          * by a DAT access exception, the PSW still points to the faulting
4155          * instruction since DAT exceptions are nullifying. So we've got
4156          * to look up the current opcode to get the length of the instruction
4157          * to be able to forward the PSW.
4158          */
4159         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4160         ilen = insn_length(opcode);
4161         if (rc < 0) {
4162                 return rc;
4163         } else if (rc) {
4164                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4165                  * Forward by arbitrary ilc, injection will take care of
4166                  * nullification if necessary.
4167                  */
4168                 pgm_info = vcpu->arch.pgm;
4169                 ilen = 4;
4170         }
4171         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4172         kvm_s390_forward_psw(vcpu, ilen);
4173         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4174 }
4175
4176 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4177 {
4178         struct mcck_volatile_info *mcck_info;
4179         struct sie_page *sie_page;
4180
4181         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4182                    vcpu->arch.sie_block->icptcode);
4183         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4184
4185         if (guestdbg_enabled(vcpu))
4186                 kvm_s390_restore_guest_per_regs(vcpu);
4187
4188         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4189         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4190
4191         if (exit_reason == -EINTR) {
4192                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4193                 sie_page = container_of(vcpu->arch.sie_block,
4194                                         struct sie_page, sie_block);
4195                 mcck_info = &sie_page->mcck_info;
4196                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4197                 return 0;
4198         }
4199
4200         if (vcpu->arch.sie_block->icptcode > 0) {
4201                 int rc = kvm_handle_sie_intercept(vcpu);
4202
4203                 if (rc != -EOPNOTSUPP)
4204                         return rc;
4205                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4206                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4207                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4208                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4209                 return -EREMOTE;
4210         } else if (exit_reason != -EFAULT) {
4211                 vcpu->stat.exit_null++;
4212                 return 0;
4213         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4214                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4215                 vcpu->run->s390_ucontrol.trans_exc_code =
4216                                                 current->thread.gmap_addr;
4217                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4218                 return -EREMOTE;
4219         } else if (current->thread.gmap_pfault) {
4220                 trace_kvm_s390_major_guest_pfault(vcpu);
4221                 current->thread.gmap_pfault = 0;
4222                 if (kvm_arch_setup_async_pf(vcpu))
4223                         return 0;
4224                 vcpu->stat.pfault_sync++;
4225                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4226         }
4227         return vcpu_post_run_fault_in_sie(vcpu);
4228 }
4229
4230 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4231 static int __vcpu_run(struct kvm_vcpu *vcpu)
4232 {
4233         int rc, exit_reason;
4234         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4235
4236         /*
4237          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4238          * ning the guest), so that memslots (and other stuff) are protected
4239          */
4240         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4241
4242         do {
4243                 rc = vcpu_pre_run(vcpu);
4244                 if (rc)
4245                         break;
4246
4247                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4248                 /*
4249                  * As PF_VCPU will be used in fault handler, between
4250                  * guest_enter and guest_exit should be no uaccess.
4251                  */
4252                 local_irq_disable();
4253                 guest_enter_irqoff();
4254                 __disable_cpu_timer_accounting(vcpu);
4255                 local_irq_enable();
4256                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4257                         memcpy(sie_page->pv_grregs,
4258                                vcpu->run->s.regs.gprs,
4259                                sizeof(sie_page->pv_grregs));
4260                 }
4261                 if (test_cpu_flag(CIF_FPU))
4262                         load_fpu_regs();
4263                 exit_reason = sie64a(vcpu->arch.sie_block,
4264                                      vcpu->run->s.regs.gprs);
4265                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4266                         memcpy(vcpu->run->s.regs.gprs,
4267                                sie_page->pv_grregs,
4268                                sizeof(sie_page->pv_grregs));
4269                         /*
4270                          * We're not allowed to inject interrupts on intercepts
4271                          * that leave the guest state in an "in-between" state
4272                          * where the next SIE entry will do a continuation.
4273                          * Fence interrupts in our "internal" PSW.
4274                          */
4275                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4276                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4277                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4278                         }
4279                 }
4280                 local_irq_disable();
4281                 __enable_cpu_timer_accounting(vcpu);
4282                 guest_exit_irqoff();
4283                 local_irq_enable();
4284                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4285
4286                 rc = vcpu_post_run(vcpu, exit_reason);
4287         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4288
4289         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4290         return rc;
4291 }
4292
4293 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4294 {
4295         struct kvm_run *kvm_run = vcpu->run;
4296         struct runtime_instr_cb *riccb;
4297         struct gs_cb *gscb;
4298
4299         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4300         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4301         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4302         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4303         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4304                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4305                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4306                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4307         }
4308         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4309                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4310                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4311                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4312                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4313                         kvm_clear_async_pf_completion_queue(vcpu);
4314         }
4315         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4316                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4317                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4318                 VCPU_EVENT(vcpu, 3, "setting cpnc to %d", vcpu->arch.diag318_info.cpnc);
4319         }
4320         /*
4321          * If userspace sets the riccb (e.g. after migration) to a valid state,
4322          * we should enable RI here instead of doing the lazy enablement.
4323          */
4324         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4325             test_kvm_facility(vcpu->kvm, 64) &&
4326             riccb->v &&
4327             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4328                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4329                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4330         }
4331         /*
4332          * If userspace sets the gscb (e.g. after migration) to non-zero,
4333          * we should enable GS here instead of doing the lazy enablement.
4334          */
4335         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4336             test_kvm_facility(vcpu->kvm, 133) &&
4337             gscb->gssm &&
4338             !vcpu->arch.gs_enabled) {
4339                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4340                 vcpu->arch.sie_block->ecb |= ECB_GS;
4341                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4342                 vcpu->arch.gs_enabled = 1;
4343         }
4344         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4345             test_kvm_facility(vcpu->kvm, 82)) {
4346                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4347                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4348         }
4349         if (MACHINE_HAS_GS) {
4350                 preempt_disable();
4351                 __ctl_set_bit(2, 4);
4352                 if (current->thread.gs_cb) {
4353                         vcpu->arch.host_gscb = current->thread.gs_cb;
4354                         save_gs_cb(vcpu->arch.host_gscb);
4355                 }
4356                 if (vcpu->arch.gs_enabled) {
4357                         current->thread.gs_cb = (struct gs_cb *)
4358                                                 &vcpu->run->s.regs.gscb;
4359                         restore_gs_cb(current->thread.gs_cb);
4360                 }
4361                 preempt_enable();
4362         }
4363         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4364 }
4365
4366 static void sync_regs(struct kvm_vcpu *vcpu)
4367 {
4368         struct kvm_run *kvm_run = vcpu->run;
4369
4370         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4371                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4372         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4373                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4374                 /* some control register changes require a tlb flush */
4375                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4376         }
4377         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4378                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4379                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4380         }
4381         save_access_regs(vcpu->arch.host_acrs);
4382         restore_access_regs(vcpu->run->s.regs.acrs);
4383         /* save host (userspace) fprs/vrs */
4384         save_fpu_regs();
4385         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4386         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4387         if (MACHINE_HAS_VX)
4388                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4389         else
4390                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4391         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4392         if (test_fp_ctl(current->thread.fpu.fpc))
4393                 /* User space provided an invalid FPC, let's clear it */
4394                 current->thread.fpu.fpc = 0;
4395
4396         /* Sync fmt2 only data */
4397         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4398                 sync_regs_fmt2(vcpu);
4399         } else {
4400                 /*
4401                  * In several places we have to modify our internal view to
4402                  * not do things that are disallowed by the ultravisor. For
4403                  * example we must not inject interrupts after specific exits
4404                  * (e.g. 112 prefix page not secure). We do this by turning
4405                  * off the machine check, external and I/O interrupt bits
4406                  * of our PSW copy. To avoid getting validity intercepts, we
4407                  * do only accept the condition code from userspace.
4408                  */
4409                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4410                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4411                                                    PSW_MASK_CC;
4412         }
4413
4414         kvm_run->kvm_dirty_regs = 0;
4415 }
4416
4417 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4418 {
4419         struct kvm_run *kvm_run = vcpu->run;
4420
4421         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4422         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4423         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4424         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4425         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4426         if (MACHINE_HAS_GS) {
4427                 preempt_disable();
4428                 __ctl_set_bit(2, 4);
4429                 if (vcpu->arch.gs_enabled)
4430                         save_gs_cb(current->thread.gs_cb);
4431                 current->thread.gs_cb = vcpu->arch.host_gscb;
4432                 restore_gs_cb(vcpu->arch.host_gscb);
4433                 if (!vcpu->arch.host_gscb)
4434                         __ctl_clear_bit(2, 4);
4435                 vcpu->arch.host_gscb = NULL;
4436                 preempt_enable();
4437         }
4438         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4439 }
4440
4441 static void store_regs(struct kvm_vcpu *vcpu)
4442 {
4443         struct kvm_run *kvm_run = vcpu->run;
4444
4445         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4446         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4447         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4448         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4449         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4450         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4451         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4452         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4453         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4454         save_access_regs(vcpu->run->s.regs.acrs);
4455         restore_access_regs(vcpu->arch.host_acrs);
4456         /* Save guest register state */
4457         save_fpu_regs();
4458         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4459         /* Restore will be done lazily at return */
4460         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4461         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4462         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4463                 store_regs_fmt2(vcpu);
4464 }
4465
4466 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4467 {
4468         struct kvm_run *kvm_run = vcpu->run;
4469         int rc;
4470
4471         if (kvm_run->immediate_exit)
4472                 return -EINTR;
4473
4474         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4475             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4476                 return -EINVAL;
4477
4478         vcpu_load(vcpu);
4479
4480         if (guestdbg_exit_pending(vcpu)) {
4481                 kvm_s390_prepare_debug_exit(vcpu);
4482                 rc = 0;
4483                 goto out;
4484         }
4485
4486         kvm_sigset_activate(vcpu);
4487
4488         /*
4489          * no need to check the return value of vcpu_start as it can only have
4490          * an error for protvirt, but protvirt means user cpu state
4491          */
4492         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4493                 kvm_s390_vcpu_start(vcpu);
4494         } else if (is_vcpu_stopped(vcpu)) {
4495                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4496                                    vcpu->vcpu_id);
4497                 rc = -EINVAL;
4498                 goto out;
4499         }
4500
4501         sync_regs(vcpu);
4502         enable_cpu_timer_accounting(vcpu);
4503
4504         might_fault();
4505         rc = __vcpu_run(vcpu);
4506
4507         if (signal_pending(current) && !rc) {
4508                 kvm_run->exit_reason = KVM_EXIT_INTR;
4509                 rc = -EINTR;
4510         }
4511
4512         if (guestdbg_exit_pending(vcpu) && !rc)  {
4513                 kvm_s390_prepare_debug_exit(vcpu);
4514                 rc = 0;
4515         }
4516
4517         if (rc == -EREMOTE) {
4518                 /* userspace support is needed, kvm_run has been prepared */
4519                 rc = 0;
4520         }
4521
4522         disable_cpu_timer_accounting(vcpu);
4523         store_regs(vcpu);
4524
4525         kvm_sigset_deactivate(vcpu);
4526
4527         vcpu->stat.exit_userspace++;
4528 out:
4529         vcpu_put(vcpu);
4530         return rc;
4531 }
4532
4533 /*
4534  * store status at address
4535  * we use have two special cases:
4536  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4537  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4538  */
4539 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4540 {
4541         unsigned char archmode = 1;
4542         freg_t fprs[NUM_FPRS];
4543         unsigned int px;
4544         u64 clkcomp, cputm;
4545         int rc;
4546
4547         px = kvm_s390_get_prefix(vcpu);
4548         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4549                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4550                         return -EFAULT;
4551                 gpa = 0;
4552         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4553                 if (write_guest_real(vcpu, 163, &archmode, 1))
4554                         return -EFAULT;
4555                 gpa = px;
4556         } else
4557                 gpa -= __LC_FPREGS_SAVE_AREA;
4558
4559         /* manually convert vector registers if necessary */
4560         if (MACHINE_HAS_VX) {
4561                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4562                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4563                                      fprs, 128);
4564         } else {
4565                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4566                                      vcpu->run->s.regs.fprs, 128);
4567         }
4568         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4569                               vcpu->run->s.regs.gprs, 128);
4570         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4571                               &vcpu->arch.sie_block->gpsw, 16);
4572         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4573                               &px, 4);
4574         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4575                               &vcpu->run->s.regs.fpc, 4);
4576         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4577                               &vcpu->arch.sie_block->todpr, 4);
4578         cputm = kvm_s390_get_cpu_timer(vcpu);
4579         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4580                               &cputm, 8);
4581         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4582         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4583                               &clkcomp, 8);
4584         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4585                               &vcpu->run->s.regs.acrs, 64);
4586         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4587                               &vcpu->arch.sie_block->gcr, 128);
4588         return rc ? -EFAULT : 0;
4589 }
4590
4591 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4592 {
4593         /*
4594          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4595          * switch in the run ioctl. Let's update our copies before we save
4596          * it into the save area
4597          */
4598         save_fpu_regs();
4599         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4600         save_access_regs(vcpu->run->s.regs.acrs);
4601
4602         return kvm_s390_store_status_unloaded(vcpu, addr);
4603 }
4604
4605 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4606 {
4607         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4608         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4609 }
4610
4611 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4612 {
4613         unsigned long i;
4614         struct kvm_vcpu *vcpu;
4615
4616         kvm_for_each_vcpu(i, vcpu, kvm) {
4617                 __disable_ibs_on_vcpu(vcpu);
4618         }
4619 }
4620
4621 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4622 {
4623         if (!sclp.has_ibs)
4624                 return;
4625         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4626         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4627 }
4628
4629 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4630 {
4631         int i, online_vcpus, r = 0, started_vcpus = 0;
4632
4633         if (!is_vcpu_stopped(vcpu))
4634                 return 0;
4635
4636         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4637         /* Only one cpu at a time may enter/leave the STOPPED state. */
4638         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4639         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4640
4641         /* Let's tell the UV that we want to change into the operating state */
4642         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4643                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4644                 if (r) {
4645                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4646                         return r;
4647                 }
4648         }
4649
4650         for (i = 0; i < online_vcpus; i++) {
4651                 if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
4652                         started_vcpus++;
4653         }
4654
4655         if (started_vcpus == 0) {
4656                 /* we're the only active VCPU -> speed it up */
4657                 __enable_ibs_on_vcpu(vcpu);
4658         } else if (started_vcpus == 1) {
4659                 /*
4660                  * As we are starting a second VCPU, we have to disable
4661                  * the IBS facility on all VCPUs to remove potentially
4662                  * outstanding ENABLE requests.
4663                  */
4664                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4665         }
4666
4667         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4668         /*
4669          * The real PSW might have changed due to a RESTART interpreted by the
4670          * ultravisor. We block all interrupts and let the next sie exit
4671          * refresh our view.
4672          */
4673         if (kvm_s390_pv_cpu_is_protected(vcpu))
4674                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4675         /*
4676          * Another VCPU might have used IBS while we were offline.
4677          * Let's play safe and flush the VCPU at startup.
4678          */
4679         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4680         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4681         return 0;
4682 }
4683
4684 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4685 {
4686         int i, online_vcpus, r = 0, started_vcpus = 0;
4687         struct kvm_vcpu *started_vcpu = NULL;
4688
4689         if (is_vcpu_stopped(vcpu))
4690                 return 0;
4691
4692         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4693         /* Only one cpu at a time may enter/leave the STOPPED state. */
4694         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4695         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4696
4697         /* Let's tell the UV that we want to change into the stopped state */
4698         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4699                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4700                 if (r) {
4701                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4702                         return r;
4703                 }
4704         }
4705
4706         /*
4707          * Set the VCPU to STOPPED and THEN clear the interrupt flag,
4708          * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
4709          * have been fully processed. This will ensure that the VCPU
4710          * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
4711          */
4712         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4713         kvm_s390_clear_stop_irq(vcpu);
4714
4715         __disable_ibs_on_vcpu(vcpu);
4716
4717         for (i = 0; i < online_vcpus; i++) {
4718                 struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
4719
4720                 if (!is_vcpu_stopped(tmp)) {
4721                         started_vcpus++;
4722                         started_vcpu = tmp;
4723                 }
4724         }
4725
4726         if (started_vcpus == 1) {
4727                 /*
4728                  * As we only have one VCPU left, we want to enable the
4729                  * IBS facility for that VCPU to speed it up.
4730                  */
4731                 __enable_ibs_on_vcpu(started_vcpu);
4732         }
4733
4734         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4735         return 0;
4736 }
4737
4738 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4739                                      struct kvm_enable_cap *cap)
4740 {
4741         int r;
4742
4743         if (cap->flags)
4744                 return -EINVAL;
4745
4746         switch (cap->cap) {
4747         case KVM_CAP_S390_CSS_SUPPORT:
4748                 if (!vcpu->kvm->arch.css_support) {
4749                         vcpu->kvm->arch.css_support = 1;
4750                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4751                         trace_kvm_s390_enable_css(vcpu->kvm);
4752                 }
4753                 r = 0;
4754                 break;
4755         default:
4756                 r = -EINVAL;
4757                 break;
4758         }
4759         return r;
4760 }
4761
4762 static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
4763                                   struct kvm_s390_mem_op *mop)
4764 {
4765         void __user *uaddr = (void __user *)mop->buf;
4766         int r = 0;
4767
4768         if (mop->flags || !mop->size)
4769                 return -EINVAL;
4770         if (mop->size + mop->sida_offset < mop->size)
4771                 return -EINVAL;
4772         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4773                 return -E2BIG;
4774         if (!kvm_s390_pv_cpu_is_protected(vcpu))
4775                 return -EINVAL;
4776
4777         switch (mop->op) {
4778         case KVM_S390_MEMOP_SIDA_READ:
4779                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4780                                  mop->sida_offset), mop->size))
4781                         r = -EFAULT;
4782
4783                 break;
4784         case KVM_S390_MEMOP_SIDA_WRITE:
4785                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4786                                    mop->sida_offset), uaddr, mop->size))
4787                         r = -EFAULT;
4788                 break;
4789         }
4790         return r;
4791 }
4792
4793 static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
4794                                  struct kvm_s390_mem_op *mop)
4795 {
4796         void __user *uaddr = (void __user *)mop->buf;
4797         void *tmpbuf = NULL;
4798         int r = 0;
4799         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4800                                     | KVM_S390_MEMOP_F_CHECK_ONLY
4801                                     | KVM_S390_MEMOP_F_SKEY_PROTECTION;
4802
4803         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4804                 return -EINVAL;
4805         if (mop->size > MEM_OP_MAX_SIZE)
4806                 return -E2BIG;
4807         if (kvm_s390_pv_cpu_is_protected(vcpu))
4808                 return -EINVAL;
4809         if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
4810                 if (access_key_invalid(mop->key))
4811                         return -EINVAL;
4812         } else {
4813                 mop->key = 0;
4814         }
4815         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4816                 tmpbuf = vmalloc(mop->size);
4817                 if (!tmpbuf)
4818                         return -ENOMEM;
4819         }
4820
4821         switch (mop->op) {
4822         case KVM_S390_MEMOP_LOGICAL_READ:
4823                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4824                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4825                                             GACC_FETCH, mop->key);
4826                         break;
4827                 }
4828                 r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4829                                         mop->size, mop->key);
4830                 if (r == 0) {
4831                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4832                                 r = -EFAULT;
4833                 }
4834                 break;
4835         case KVM_S390_MEMOP_LOGICAL_WRITE:
4836                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4837                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
4838                                             GACC_STORE, mop->key);
4839                         break;
4840                 }
4841                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4842                         r = -EFAULT;
4843                         break;
4844                 }
4845                 r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
4846                                          mop->size, mop->key);
4847                 break;
4848         }
4849
4850         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4851                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4852
4853         vfree(tmpbuf);
4854         return r;
4855 }
4856
4857 static long kvm_s390_vcpu_memsida_op(struct kvm_vcpu *vcpu,
4858                                      struct kvm_s390_mem_op *mop)
4859 {
4860         int r, srcu_idx;
4861
4862         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4863
4864         switch (mop->op) {
4865         case KVM_S390_MEMOP_LOGICAL_READ:
4866         case KVM_S390_MEMOP_LOGICAL_WRITE:
4867                 r = kvm_s390_vcpu_mem_op(vcpu, mop);
4868                 break;
4869         case KVM_S390_MEMOP_SIDA_READ:
4870         case KVM_S390_MEMOP_SIDA_WRITE:
4871                 /* we are locked against sida going away by the vcpu->mutex */
4872                 r = kvm_s390_vcpu_sida_op(vcpu, mop);
4873                 break;
4874         default:
4875                 r = -EINVAL;
4876         }
4877
4878         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4879         return r;
4880 }
4881
4882 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4883                                unsigned int ioctl, unsigned long arg)
4884 {
4885         struct kvm_vcpu *vcpu = filp->private_data;
4886         void __user *argp = (void __user *)arg;
4887
4888         switch (ioctl) {
4889         case KVM_S390_IRQ: {
4890                 struct kvm_s390_irq s390irq;
4891
4892                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4893                         return -EFAULT;
4894                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4895         }
4896         case KVM_S390_INTERRUPT: {
4897                 struct kvm_s390_interrupt s390int;
4898                 struct kvm_s390_irq s390irq = {};
4899
4900                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4901                         return -EFAULT;
4902                 if (s390int_to_s390irq(&s390int, &s390irq))
4903                         return -EINVAL;
4904                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4905         }
4906         }
4907         return -ENOIOCTLCMD;
4908 }
4909
4910 long kvm_arch_vcpu_ioctl(struct file *filp,
4911                          unsigned int ioctl, unsigned long arg)
4912 {
4913         struct kvm_vcpu *vcpu = filp->private_data;
4914         void __user *argp = (void __user *)arg;
4915         int idx;
4916         long r;
4917         u16 rc, rrc;
4918
4919         vcpu_load(vcpu);
4920
4921         switch (ioctl) {
4922         case KVM_S390_STORE_STATUS:
4923                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4924                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4925                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4926                 break;
4927         case KVM_S390_SET_INITIAL_PSW: {
4928                 psw_t psw;
4929
4930                 r = -EFAULT;
4931                 if (copy_from_user(&psw, argp, sizeof(psw)))
4932                         break;
4933                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4934                 break;
4935         }
4936         case KVM_S390_CLEAR_RESET:
4937                 r = 0;
4938                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4939                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4940                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4941                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4942                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4943                                    rc, rrc);
4944                 }
4945                 break;
4946         case KVM_S390_INITIAL_RESET:
4947                 r = 0;
4948                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4949                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4950                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4951                                           UVC_CMD_CPU_RESET_INITIAL,
4952                                           &rc, &rrc);
4953                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4954                                    rc, rrc);
4955                 }
4956                 break;
4957         case KVM_S390_NORMAL_RESET:
4958                 r = 0;
4959                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4960                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4961                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4962                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4963                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4964                                    rc, rrc);
4965                 }
4966                 break;
4967         case KVM_SET_ONE_REG:
4968         case KVM_GET_ONE_REG: {
4969                 struct kvm_one_reg reg;
4970                 r = -EINVAL;
4971                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4972                         break;
4973                 r = -EFAULT;
4974                 if (copy_from_user(&reg, argp, sizeof(reg)))
4975                         break;
4976                 if (ioctl == KVM_SET_ONE_REG)
4977                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4978                 else
4979                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4980                 break;
4981         }
4982 #ifdef CONFIG_KVM_S390_UCONTROL
4983         case KVM_S390_UCAS_MAP: {
4984                 struct kvm_s390_ucas_mapping ucasmap;
4985
4986                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4987                         r = -EFAULT;
4988                         break;
4989                 }
4990
4991                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4992                         r = -EINVAL;
4993                         break;
4994                 }
4995
4996                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4997                                      ucasmap.vcpu_addr, ucasmap.length);
4998                 break;
4999         }
5000         case KVM_S390_UCAS_UNMAP: {
5001                 struct kvm_s390_ucas_mapping ucasmap;
5002
5003                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
5004                         r = -EFAULT;
5005                         break;
5006                 }
5007
5008                 if (!kvm_is_ucontrol(vcpu->kvm)) {
5009                         r = -EINVAL;
5010                         break;
5011                 }
5012
5013                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
5014                         ucasmap.length);
5015                 break;
5016         }
5017 #endif
5018         case KVM_S390_VCPU_FAULT: {
5019                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
5020                 break;
5021         }
5022         case KVM_ENABLE_CAP:
5023         {
5024                 struct kvm_enable_cap cap;
5025                 r = -EFAULT;
5026                 if (copy_from_user(&cap, argp, sizeof(cap)))
5027                         break;
5028                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
5029                 break;
5030         }
5031         case KVM_S390_MEM_OP: {
5032                 struct kvm_s390_mem_op mem_op;
5033
5034                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
5035                         r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op);
5036                 else
5037                         r = -EFAULT;
5038                 break;
5039         }
5040         case KVM_S390_SET_IRQ_STATE: {
5041                 struct kvm_s390_irq_state irq_state;
5042
5043                 r = -EFAULT;
5044                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5045                         break;
5046                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
5047                     irq_state.len == 0 ||
5048                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
5049                         r = -EINVAL;
5050                         break;
5051                 }
5052                 /* do not use irq_state.flags, it will break old QEMUs */
5053                 r = kvm_s390_set_irq_state(vcpu,
5054                                            (void __user *) irq_state.buf,
5055                                            irq_state.len);
5056                 break;
5057         }
5058         case KVM_S390_GET_IRQ_STATE: {
5059                 struct kvm_s390_irq_state irq_state;
5060
5061                 r = -EFAULT;
5062                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
5063                         break;
5064                 if (irq_state.len == 0) {
5065                         r = -EINVAL;
5066                         break;
5067                 }
5068                 /* do not use irq_state.flags, it will break old QEMUs */
5069                 r = kvm_s390_get_irq_state(vcpu,
5070                                            (__u8 __user *)  irq_state.buf,
5071                                            irq_state.len);
5072                 break;
5073         }
5074         default:
5075                 r = -ENOTTY;
5076         }
5077
5078         vcpu_put(vcpu);
5079         return r;
5080 }
5081
5082 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5083 {
5084 #ifdef CONFIG_KVM_S390_UCONTROL
5085         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5086                  && (kvm_is_ucontrol(vcpu->kvm))) {
5087                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5088                 get_page(vmf->page);
5089                 return 0;
5090         }
5091 #endif
5092         return VM_FAULT_SIGBUS;
5093 }
5094
5095 /* Section: memory related */
5096 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5097                                    const struct kvm_memory_slot *old,
5098                                    struct kvm_memory_slot *new,
5099                                    enum kvm_mr_change change)
5100 {
5101         gpa_t size;
5102
5103         /* When we are protected, we should not change the memory slots */
5104         if (kvm_s390_pv_get_handle(kvm))
5105                 return -EINVAL;
5106
5107         if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
5108                 return 0;
5109
5110         /* A few sanity checks. We can have memory slots which have to be
5111            located/ended at a segment boundary (1MB). The memory in userland is
5112            ok to be fragmented into various different vmas. It is okay to mmap()
5113            and munmap() stuff in this slot after doing this call at any time */
5114
5115         if (new->userspace_addr & 0xffffful)
5116                 return -EINVAL;
5117
5118         size = new->npages * PAGE_SIZE;
5119         if (size & 0xffffful)
5120                 return -EINVAL;
5121
5122         if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
5123                 return -EINVAL;
5124
5125         return 0;
5126 }
5127
5128 void kvm_arch_commit_memory_region(struct kvm *kvm,
5129                                 struct kvm_memory_slot *old,
5130                                 const struct kvm_memory_slot *new,
5131                                 enum kvm_mr_change change)
5132 {
5133         int rc = 0;
5134
5135         switch (change) {
5136         case KVM_MR_DELETE:
5137                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5138                                         old->npages * PAGE_SIZE);
5139                 break;
5140         case KVM_MR_MOVE:
5141                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5142                                         old->npages * PAGE_SIZE);
5143                 if (rc)
5144                         break;
5145                 fallthrough;
5146         case KVM_MR_CREATE:
5147                 rc = gmap_map_segment(kvm->arch.gmap, new->userspace_addr,
5148                                       new->base_gfn * PAGE_SIZE,
5149                                       new->npages * PAGE_SIZE);
5150                 break;
5151         case KVM_MR_FLAGS_ONLY:
5152                 break;
5153         default:
5154                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5155         }
5156         if (rc)
5157                 pr_warn("failed to commit memory region\n");
5158         return;
5159 }
5160
5161 static inline unsigned long nonhyp_mask(int i)
5162 {
5163         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5164
5165         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5166 }
5167
5168 static int __init kvm_s390_init(void)
5169 {
5170         int i;
5171
5172         if (!sclp.has_sief2) {
5173                 pr_info("SIE is not available\n");
5174                 return -ENODEV;
5175         }
5176
5177         if (nested && hpage) {
5178                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5179                 return -EINVAL;
5180         }
5181
5182         for (i = 0; i < 16; i++)
5183                 kvm_s390_fac_base[i] |=
5184                         stfle_fac_list[i] & nonhyp_mask(i);
5185
5186         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5187 }
5188
5189 static void __exit kvm_s390_exit(void)
5190 {
5191         kvm_exit();
5192 }
5193
5194 module_init(kvm_s390_init);
5195 module_exit(kvm_s390_exit);
5196
5197 /*
5198  * Enable autoloading of the kvm module.
5199  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5200  * since x86 takes a different approach.
5201  */
5202 #include <linux/miscdevice.h>
5203 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5204 MODULE_ALIAS("devname:kvm");