arch/s390/kvm/kvm-s390.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * hosting IBM Z kernel virtual machines (s390x)
   4  *
   5  * Copyright IBM Corp. 2008, 2020
   6  *
   7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
   8  *               Christian Borntraeger <borntraeger@de.ibm.com>
   9  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  10  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  11  *               Jason J. Herne <jjherne@us.ibm.com>
  12  */
  13
  14 #define KMSG_COMPONENT "kvm-s390"
  15 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/mman.h>
  25 #include <linux/module.h>
  26 #include <linux/moduleparam.h>
  27 #include <linux/random.h>
  28 #include <linux/slab.h>
  29 #include <linux/timer.h>
  30 #include <linux/vmalloc.h>
  31 #include <linux/bitmap.h>
  32 #include <linux/sched/signal.h>
  33 #include <linux/string.h>
  34 #include <linux/pgtable.h>
  35
  36 #include <asm/asm-offsets.h>
  37 #include <asm/lowcore.h>
  38 #include <asm/stp.h>
  39 #include <asm/gmap.h>
  40 #include <asm/nmi.h>
  41 #include <asm/switch_to.h>
  42 #include <asm/isc.h>
  43 #include <asm/sclp.h>
  44 #include <asm/cpacf.h>
  45 #include <asm/timex.h>
  46 #include <asm/ap.h>
  47 #include <asm/uv.h>
  48 #include <asm/fpu/api.h>
  49 #include "kvm-s390.h"
  50 #include "gaccess.h"
  51
  52 #define CREATE_TRACE_POINTS
  53 #include "trace.h"
  54 #include "trace-s390.h"
  55
  56 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  57 #define LOCAL_IRQS 32
  58 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  59                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  60
  61 const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
  62         KVM_GENERIC_VM_STATS(),
  63         STATS_DESC_COUNTER(VM, inject_io),
  64         STATS_DESC_COUNTER(VM, inject_float_mchk),
  65         STATS_DESC_COUNTER(VM, inject_pfault_done),
  66         STATS_DESC_COUNTER(VM, inject_service_signal),
  67         STATS_DESC_COUNTER(VM, inject_virtio)
  68 };
  69 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
  70                 sizeof(struct kvm_vm_stat) / sizeof(u64));
  71
  72 const struct kvm_stats_header kvm_vm_stats_header = {
  73         .name_size = KVM_STATS_NAME_SIZE,
  74         .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
  75         .id_offset = sizeof(struct kvm_stats_header),
  76         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
  77         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
  78                        sizeof(kvm_vm_stats_desc),
  79 };
  80
  81 const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
  82         KVM_GENERIC_VCPU_STATS(),
  83         STATS_DESC_COUNTER(VCPU, exit_userspace),
  84         STATS_DESC_COUNTER(VCPU, exit_null),
  85         STATS_DESC_COUNTER(VCPU, exit_external_request),
  86         STATS_DESC_COUNTER(VCPU, exit_io_request),
  87         STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
  88         STATS_DESC_COUNTER(VCPU, exit_stop_request),
  89         STATS_DESC_COUNTER(VCPU, exit_validity),
  90         STATS_DESC_COUNTER(VCPU, exit_instruction),
  91         STATS_DESC_COUNTER(VCPU, exit_pei),
  92         STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
  93         STATS_DESC_COUNTER(VCPU, instruction_lctl),
  94         STATS_DESC_COUNTER(VCPU, instruction_lctlg),
  95         STATS_DESC_COUNTER(VCPU, instruction_stctl),
  96         STATS_DESC_COUNTER(VCPU, instruction_stctg),
  97         STATS_DESC_COUNTER(VCPU, exit_program_interruption),
  98         STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
  99         STATS_DESC_COUNTER(VCPU, exit_operation_exception),
 100         STATS_DESC_COUNTER(VCPU, deliver_ckc),
 101         STATS_DESC_COUNTER(VCPU, deliver_cputm),
 102         STATS_DESC_COUNTER(VCPU, deliver_external_call),
 103         STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
 104         STATS_DESC_COUNTER(VCPU, deliver_service_signal),
 105         STATS_DESC_COUNTER(VCPU, deliver_virtio),
 106         STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
 107         STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
 108         STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
 109         STATS_DESC_COUNTER(VCPU, deliver_program),
 110         STATS_DESC_COUNTER(VCPU, deliver_io),
 111         STATS_DESC_COUNTER(VCPU, deliver_machine_check),
 112         STATS_DESC_COUNTER(VCPU, exit_wait_state),
 113         STATS_DESC_COUNTER(VCPU, inject_ckc),
 114         STATS_DESC_COUNTER(VCPU, inject_cputm),
 115         STATS_DESC_COUNTER(VCPU, inject_external_call),
 116         STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
 117         STATS_DESC_COUNTER(VCPU, inject_mchk),
 118         STATS_DESC_COUNTER(VCPU, inject_pfault_init),
 119         STATS_DESC_COUNTER(VCPU, inject_program),
 120         STATS_DESC_COUNTER(VCPU, inject_restart),
 121         STATS_DESC_COUNTER(VCPU, inject_set_prefix),
 122         STATS_DESC_COUNTER(VCPU, inject_stop_signal),
 123         STATS_DESC_COUNTER(VCPU, instruction_epsw),
 124         STATS_DESC_COUNTER(VCPU, instruction_gs),
 125         STATS_DESC_COUNTER(VCPU, instruction_io_other),
 126         STATS_DESC_COUNTER(VCPU, instruction_lpsw),
 127         STATS_DESC_COUNTER(VCPU, instruction_lpswe),
 128         STATS_DESC_COUNTER(VCPU, instruction_pfmf),
 129         STATS_DESC_COUNTER(VCPU, instruction_ptff),
 130         STATS_DESC_COUNTER(VCPU, instruction_sck),
 131         STATS_DESC_COUNTER(VCPU, instruction_sckpf),
 132         STATS_DESC_COUNTER(VCPU, instruction_stidp),
 133         STATS_DESC_COUNTER(VCPU, instruction_spx),
 134         STATS_DESC_COUNTER(VCPU, instruction_stpx),
 135         STATS_DESC_COUNTER(VCPU, instruction_stap),
 136         STATS_DESC_COUNTER(VCPU, instruction_iske),
 137         STATS_DESC_COUNTER(VCPU, instruction_ri),
 138         STATS_DESC_COUNTER(VCPU, instruction_rrbe),
 139         STATS_DESC_COUNTER(VCPU, instruction_sske),
 140         STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
 141         STATS_DESC_COUNTER(VCPU, instruction_stsi),
 142         STATS_DESC_COUNTER(VCPU, instruction_stfl),
 143         STATS_DESC_COUNTER(VCPU, instruction_tb),
 144         STATS_DESC_COUNTER(VCPU, instruction_tpi),
 145         STATS_DESC_COUNTER(VCPU, instruction_tprot),
 146         STATS_DESC_COUNTER(VCPU, instruction_tsch),
 147         STATS_DESC_COUNTER(VCPU, instruction_sie),
 148         STATS_DESC_COUNTER(VCPU, instruction_essa),
 149         STATS_DESC_COUNTER(VCPU, instruction_sthyi),
 150         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
 151         STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
 152         STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
 153         STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
 154         STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
 155         STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
 156         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
 157         STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
 158         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
 159         STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
 160         STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
 161         STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
 162         STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
 163         STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
 164         STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
 165         STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
 166         STATS_DESC_COUNTER(VCPU, instruction_diagnose_10),
 167         STATS_DESC_COUNTER(VCPU, instruction_diagnose_44),
 168         STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c),
 169         STATS_DESC_COUNTER(VCPU, diag_9c_ignored),
 170         STATS_DESC_COUNTER(VCPU, diag_9c_forward),
 171         STATS_DESC_COUNTER(VCPU, instruction_diagnose_258),
 172         STATS_DESC_COUNTER(VCPU, instruction_diagnose_308),
 173         STATS_DESC_COUNTER(VCPU, instruction_diagnose_500),
 174         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
 175         STATS_DESC_COUNTER(VCPU, pfault_sync)
 176 };
 177 static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
 178                 sizeof(struct kvm_vcpu_stat) / sizeof(u64));
 179
 180 const struct kvm_stats_header kvm_vcpu_stats_header = {
 181         .name_size = KVM_STATS_NAME_SIZE,
 182         .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
 183         .id_offset = sizeof(struct kvm_stats_header),
 184         .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
 185         .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
 186                        sizeof(kvm_vcpu_stats_desc),
 187 };
 188
 189 /* allow nested virtualization in KVM (if enabled by user space) */
 190 static int nested;
 191 module_param(nested, int, S_IRUGO);
 192 MODULE_PARM_DESC(nested, "Nested virtualization support");
 193
 194 /* allow 1m huge page guest backing, if !nested */
 195 static int hpage;
 196 module_param(hpage, int, 0444);
 197 MODULE_PARM_DESC(hpage, "1m huge page backing support");
 198
 199 /* maximum percentage of steal time for polling.  >100 is treated like 100 */
 200 static u8 halt_poll_max_steal = 10;
 201 module_param(halt_poll_max_steal, byte, 0644);
 202 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 203
 204 /* if set to true, the GISA will be initialized and used if available */
 205 static bool use_gisa  = true;
 206 module_param(use_gisa, bool, 0644);
 207 MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
 208
 209 /* maximum diag9c forwarding per second */
 210 unsigned int diag9c_forwarding_hz;
 211 module_param(diag9c_forwarding_hz, uint, 0644);
 212 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 213
 214 /*
 215  * For now we handle at most 16 double words as this is what the s390 base
 216  * kernel handles and stores in the prefix page. If we ever need to go beyond
 217  * this, this requires changes to code, but the external uapi can stay.
 218  */
 219 #define SIZE_INTERNAL 16
 220
 221 /*
 222  * Base feature mask that defines default mask for facilities. Consists of the
 223  * defines in FACILITIES_KVM and the non-hypervisor managed bits.
 224  */
 225 static unsigned long kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM };
 226 /*
 227  * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL
 228  * and defines the facilities that can be enabled via a cpu model.
 229  */
 230 static unsigned long kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
 231
 232 static unsigned long kvm_s390_fac_size(void)
 233 {
 234         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_MASK_SIZE_U64);
 235         BUILD_BUG_ON(SIZE_INTERNAL > S390_ARCH_FAC_LIST_SIZE_U64);
 236         BUILD_BUG_ON(SIZE_INTERNAL * sizeof(unsigned long) >
 237                 sizeof(stfle_fac_list));
 238
 239         return SIZE_INTERNAL;
 240 }
 241
 242 /* available cpu features supported by kvm */
 243 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 244 /* available subfunctions indicated via query / "test bit" */
 245 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 246
 247 static struct gmap_notifier gmap_notifier;
 248 static struct gmap_notifier vsie_gmap_notifier;
 249 debug_info_t *kvm_s390_dbf;
 250 debug_info_t *kvm_s390_dbf_uv;
 251
 252 /* Section: not file related */
 253 int kvm_arch_hardware_enable(void)
 254 {
 255         /* every s390 is virtualization enabled ;-) */
 256         return 0;
 257 }
 258
 259 int kvm_arch_check_processor_compat(void *opaque)
 260 {
 261         return 0;
 262 }
 263
 264 /* forward declarations */
 265 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 266                               unsigned long end);
 267 static int sca_switch_to_extended(struct kvm *kvm);
 268
 269 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 270 {
 271         u8 delta_idx = 0;
 272
 273         /*
 274          * The TOD jumps by delta, we have to compensate this by adding
 275          * -delta to the epoch.
 276          */
 277         delta = -delta;
 278
 279         /* sign-extension - we're adding to signed values below */
 280         if ((s64)delta < 0)
 281                 delta_idx = -1;
 282
 283         scb->epoch += delta;
 284         if (scb->ecd & ECD_MEF) {
 285                 scb->epdx += delta_idx;
 286                 if (scb->epoch < delta)
 287                         scb->epdx += 1;
 288         }
 289 }
 290
 291 /*
 292  * This callback is executed during stop_machine(). All CPUs are therefore
 293  * temporarily stopped. In order not to change guest behavior, we have to
 294  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 295  * so a CPU won't be stopped while calculating with the epoch.
 296  */
 297 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 298                           void *v)
 299 {
 300         struct kvm *kvm;
 301         struct kvm_vcpu *vcpu;
 302         int i;
 303         unsigned long long *delta = v;
 304
 305         list_for_each_entry(kvm, &vm_list, vm_list) {
 306                 kvm_for_each_vcpu(i, vcpu, kvm) {
 307                         kvm_clock_sync_scb(vcpu->arch.sie_block, *delta);
 308                         if (i == 0) {
 309                                 kvm->arch.epoch = vcpu->arch.sie_block->epoch;
 310                                 kvm->arch.epdx = vcpu->arch.sie_block->epdx;
 311                         }
 312                         if (vcpu->arch.cputm_enabled)
 313                                 vcpu->arch.cputm_start += *delta;
 314                         if (vcpu->arch.vsie_block)
 315                                 kvm_clock_sync_scb(vcpu->arch.vsie_block,
 316                                                    *delta);
 317                 }
 318         }
 319         return NOTIFY_OK;
 320 }
 321
 322 static struct notifier_block kvm_clock_notifier = {
 323         .notifier_call = kvm_clock_sync,
 324 };
 325
 326 int kvm_arch_hardware_setup(void *opaque)
 327 {
 328         gmap_notifier.notifier_call = kvm_gmap_notifier;
 329         gmap_register_pte_notifier(&gmap_notifier);
 330         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
 331         gmap_register_pte_notifier(&vsie_gmap_notifier);
 332         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 333                                        &kvm_clock_notifier);
 334         return 0;
 335 }
 336
 337 void kvm_arch_hardware_unsetup(void)
 338 {
 339         gmap_unregister_pte_notifier(&gmap_notifier);
 340         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
 341         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 342                                          &kvm_clock_notifier);
 343 }
 344
 345 static void allow_cpu_feat(unsigned long nr)
 346 {
 347         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 348 }
 349
 350 static inline int plo_test_bit(unsigned char nr)
 351 {
 352         unsigned long function = (unsigned long)nr | 0x100;
 353         int cc;
 354
 355         asm volatile(
 356                 "       lgr     0,%[function]\n"
 357                 /* Parameter registers are ignored for "test bit" */
 358                 "       plo     0,0,0,0(0)\n"
 359                 "       ipm     %0\n"
 360                 "       srl     %0,28\n"
 361                 : "=d" (cc)
 362                 : [function] "d" (function)
 363                 : "cc", "0");
 364         return cc == 0;
 365 }
 366
 367 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 368 {
 369         asm volatile(
 370                 "       lghi    0,0\n"
 371                 "       lgr     1,%[query]\n"
 372                 /* Parameter registers are ignored */
 373                 "       .insn   rrf,%[opc] << 16,2,4,6,0\n"
 374                 :
 375                 : [query] "d" ((unsigned long)query), [opc] "i" (opcode)
 376                 : "cc", "memory", "0", "1");
 377 }
 378
 379 #define INSN_SORTL 0xb938
 380 #define INSN_DFLTCC 0xb939
 381
 382 static void kvm_s390_cpu_feat_init(void)
 383 {
 384         int i;
 385
 386         for (i = 0; i < 256; ++i) {
 387                 if (plo_test_bit(i))
 388                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 389         }
 390
 391         if (test_facility(28)) /* TOD-clock steering */
 392                 ptff(kvm_s390_available_subfunc.ptff,
 393                      sizeof(kvm_s390_available_subfunc.ptff),
 394                      PTFF_QAF);
 395
 396         if (test_facility(17)) { /* MSA */
 397                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
 398                               kvm_s390_available_subfunc.kmac);
 399                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
 400                               kvm_s390_available_subfunc.kmc);
 401                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
 402                               kvm_s390_available_subfunc.km);
 403                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
 404                               kvm_s390_available_subfunc.kimd);
 405                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
 406                               kvm_s390_available_subfunc.klmd);
 407         }
 408         if (test_facility(76)) /* MSA3 */
 409                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
 410                               kvm_s390_available_subfunc.pckmo);
 411         if (test_facility(77)) { /* MSA4 */
 412                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
 413                               kvm_s390_available_subfunc.kmctr);
 414                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
 415                               kvm_s390_available_subfunc.kmf);
 416                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
 417                               kvm_s390_available_subfunc.kmo);
 418                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
 419                               kvm_s390_available_subfunc.pcc);
 420         }
 421         if (test_facility(57)) /* MSA5 */
 422                 __cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
 423                               kvm_s390_available_subfunc.ppno);
 424
 425         if (test_facility(146)) /* MSA8 */
 426                 __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
 427                               kvm_s390_available_subfunc.kma);
 428
 429         if (test_facility(155)) /* MSA9 */
 430                 __cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
 431                               kvm_s390_available_subfunc.kdsa);
 432
 433         if (test_facility(150)) /* SORTL */
 434                 __insn32_query(INSN_SORTL, kvm_s390_available_subfunc.sortl);
 435
 436         if (test_facility(151)) /* DFLTCC */
 437                 __insn32_query(INSN_DFLTCC, kvm_s390_available_subfunc.dfltcc);
 438
 439         if (MACHINE_HAS_ESOP)
 440                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 441         /*
 442          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
 443          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
 444          */
 445         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
 446             !test_facility(3) || !nested)
 447                 return;
 448         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
 449         if (sclp.has_64bscao)
 450                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
 451         if (sclp.has_siif)
 452                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
 453         if (sclp.has_gpere)
 454                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
 455         if (sclp.has_gsls)
 456                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
 457         if (sclp.has_ib)
 458                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
 459         if (sclp.has_cei)
 460                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
 461         if (sclp.has_ibs)
 462                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
 463         if (sclp.has_kss)
 464                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
 465         /*
 466          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
 467          * all skey handling functions read/set the skey from the PGSTE
 468          * instead of the real storage key.
 469          *
 470          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
 471          * pages being detected as preserved although they are resident.
 472          *
 473          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
 474          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
 475          *
 476          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
 477          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
 478          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
 479          *
 480          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
 481          * cannot easily shadow the SCA because of the ipte lock.
 482          */
 483 }
 484
 485 int kvm_arch_init(void *opaque)
 486 {
 487         int rc = -ENOMEM;
 488
 489         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 490         if (!kvm_s390_dbf)
 491                 return -ENOMEM;
 492
 493         kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
 494         if (!kvm_s390_dbf_uv)
 495                 goto out;
 496
 497         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
 498             debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
 499                 goto out;
 500
 501         kvm_s390_cpu_feat_init();
 502
 503         /* Register floating interrupt controller interface. */
 504         rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 505         if (rc) {
 506                 pr_err("A FLIC registration call failed with rc=%d\n", rc);
 507                 goto out;
 508         }
 509
 510         rc = kvm_s390_gib_init(GAL_ISC);
 511         if (rc)
 512                 goto out;
 513
 514         return 0;
 515
 516 out:
 517         kvm_arch_exit();
 518         return rc;
 519 }
 520
 521 void kvm_arch_exit(void)
 522 {
 523         kvm_s390_gib_destroy();
 524         debug_unregister(kvm_s390_dbf);
 525         debug_unregister(kvm_s390_dbf_uv);
 526 }
 527
 528 /* Section: device related */
 529 long kvm_arch_dev_ioctl(struct file *filp,
 530                         unsigned int ioctl, unsigned long arg)
 531 {
 532         if (ioctl == KVM_S390_ENABLE_SIE)
 533                 return s390_enable_sie();
 534         return -EINVAL;
 535 }
 536
 537 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 538 {
 539         int r;
 540
 541         switch (ext) {
 542         case KVM_CAP_S390_PSW:
 543         case KVM_CAP_S390_GMAP:
 544         case KVM_CAP_SYNC_MMU:
 545 #ifdef CONFIG_KVM_S390_UCONTROL
 546         case KVM_CAP_S390_UCONTROL:
 547 #endif
 548         case KVM_CAP_ASYNC_PF:
 549         case KVM_CAP_SYNC_REGS:
 550         case KVM_CAP_ONE_REG:
 551         case KVM_CAP_ENABLE_CAP:
 552         case KVM_CAP_S390_CSS_SUPPORT:
 553         case KVM_CAP_IOEVENTFD:
 554         case KVM_CAP_DEVICE_CTRL:
 555         case KVM_CAP_S390_IRQCHIP:
 556         case KVM_CAP_VM_ATTRIBUTES:
 557         case KVM_CAP_MP_STATE:
 558         case KVM_CAP_IMMEDIATE_EXIT:
 559         case KVM_CAP_S390_INJECT_IRQ:
 560         case KVM_CAP_S390_USER_SIGP:
 561         case KVM_CAP_S390_USER_STSI:
 562         case KVM_CAP_S390_SKEYS:
 563         case KVM_CAP_S390_IRQ_STATE:
 564         case KVM_CAP_S390_USER_INSTR0:
 565         case KVM_CAP_S390_CMMA_MIGRATION:
 566         case KVM_CAP_S390_AIS:
 567         case KVM_CAP_S390_AIS_MIGRATION:
 568         case KVM_CAP_S390_VCPU_RESETS:
 569         case KVM_CAP_SET_GUEST_DEBUG:
 570         case KVM_CAP_S390_DIAG318:
 571                 r = 1;
 572                 break;
 573         case KVM_CAP_SET_GUEST_DEBUG2:
 574                 r = KVM_GUESTDBG_VALID_MASK;
 575                 break;
 576         case KVM_CAP_S390_HPAGE_1M:
 577                 r = 0;
 578                 if (hpage && !kvm_is_ucontrol(kvm))
 579                         r = 1;
 580                 break;
 581         case KVM_CAP_S390_MEM_OP:
 582                 r = MEM_OP_MAX_SIZE;
 583                 break;
 584         case KVM_CAP_NR_VCPUS:
 585         case KVM_CAP_MAX_VCPUS:
 586         case KVM_CAP_MAX_VCPU_ID:
 587                 r = KVM_S390_BSCA_CPU_SLOTS;
 588                 if (!kvm_s390_use_sca_entries())
 589                         r = KVM_MAX_VCPUS;
 590                 else if (sclp.has_esca && sclp.has_64bscao)
 591                         r = KVM_S390_ESCA_CPU_SLOTS;
 592                 break;
 593         case KVM_CAP_S390_COW:
 594                 r = MACHINE_HAS_ESOP;
 595                 break;
 596         case KVM_CAP_S390_VECTOR_REGISTERS:
 597                 r = MACHINE_HAS_VX;
 598                 break;
 599         case KVM_CAP_S390_RI:
 600                 r = test_facility(64);
 601                 break;
 602         case KVM_CAP_S390_GS:
 603                 r = test_facility(133);
 604                 break;
 605         case KVM_CAP_S390_BPB:
 606                 r = test_facility(82);
 607                 break;
 608         case KVM_CAP_S390_PROTECTED:
 609                 r = is_prot_virt_host();
 610                 break;
 611         default:
 612                 r = 0;
 613         }
 614         return r;
 615 }
 616
 617 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 618 {
 619         int i;
 620         gfn_t cur_gfn, last_gfn;
 621         unsigned long gaddr, vmaddr;
 622         struct gmap *gmap = kvm->arch.gmap;
 623         DECLARE_BITMAP(bitmap, _PAGE_ENTRIES);
 624
 625         /* Loop over all guest segments */
 626         cur_gfn = memslot->base_gfn;
 627         last_gfn = memslot->base_gfn + memslot->npages;
 628         for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
 629                 gaddr = gfn_to_gpa(cur_gfn);
 630                 vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
 631                 if (kvm_is_error_hva(vmaddr))
 632                         continue;
 633
 634                 bitmap_zero(bitmap, _PAGE_ENTRIES);
 635                 gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr);
 636                 for (i = 0; i < _PAGE_ENTRIES; i++) {
 637                         if (test_bit(i, bitmap))
 638                                 mark_page_dirty(kvm, cur_gfn + i);
 639                 }
 640
 641                 if (fatal_signal_pending(current))
 642                         return;
 643                 cond_resched();
 644         }
 645 }
 646
 647 /* Section: vm related */
 648 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 649
 650 /*
 651  * Get (and clear) the dirty memory log for a memory slot.
 652  */
 653 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 654                                struct kvm_dirty_log *log)
 655 {
 656         int r;
 657         unsigned long n;
 658         struct kvm_memory_slot *memslot;
 659         int is_dirty;
 660
 661         if (kvm_is_ucontrol(kvm))
 662                 return -EINVAL;
 663
 664         mutex_lock(&kvm->slots_lock);
 665
 666         r = -EINVAL;
 667         if (log->slot >= KVM_USER_MEM_SLOTS)
 668                 goto out;
 669
 670         r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 671         if (r)
 672                 goto out;
 673
 674         /* Clear the dirty log */
 675         if (is_dirty) {
 676                 n = kvm_dirty_bitmap_bytes(memslot);
 677                 memset(memslot->dirty_bitmap, 0, n);
 678         }
 679         r = 0;
 680 out:
 681         mutex_unlock(&kvm->slots_lock);
 682         return r;
 683 }
 684
 685 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
 686 {
 687         unsigned int i;
 688         struct kvm_vcpu *vcpu;
 689
 690         kvm_for_each_vcpu(i, vcpu, kvm) {
 691                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
 692         }
 693 }
 694
 695 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 696 {
 697         int r;
 698
 699         if (cap->flags)
 700                 return -EINVAL;
 701
 702         switch (cap->cap) {
 703         case KVM_CAP_S390_IRQCHIP:
 704                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 705                 kvm->arch.use_irqchip = 1;
 706                 r = 0;
 707                 break;
 708         case KVM_CAP_S390_USER_SIGP:
 709                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 710                 kvm->arch.user_sigp = 1;
 711                 r = 0;
 712                 break;
 713         case KVM_CAP_S390_VECTOR_REGISTERS:
 714                 mutex_lock(&kvm->lock);
 715                 if (kvm->created_vcpus) {
 716                         r = -EBUSY;
 717                 } else if (MACHINE_HAS_VX) {
 718                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 719                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 720                         if (test_facility(134)) {
 721                                 set_kvm_facility(kvm->arch.model.fac_mask, 134);
 722                                 set_kvm_facility(kvm->arch.model.fac_list, 134);
 723                         }
 724                         if (test_facility(135)) {
 725                                 set_kvm_facility(kvm->arch.model.fac_mask, 135);
 726                                 set_kvm_facility(kvm->arch.model.fac_list, 135);
 727                         }
 728                         if (test_facility(148)) {
 729                                 set_kvm_facility(kvm->arch.model.fac_mask, 148);
 730                                 set_kvm_facility(kvm->arch.model.fac_list, 148);
 731                         }
 732                         if (test_facility(152)) {
 733                                 set_kvm_facility(kvm->arch.model.fac_mask, 152);
 734                                 set_kvm_facility(kvm->arch.model.fac_list, 152);
 735                         }
 736                         if (test_facility(192)) {
 737                                 set_kvm_facility(kvm->arch.model.fac_mask, 192);
 738                                 set_kvm_facility(kvm->arch.model.fac_list, 192);
 739                         }
 740                         r = 0;
 741                 } else
 742                         r = -EINVAL;
 743                 mutex_unlock(&kvm->lock);
 744                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 745                          r ? "(not available)" : "(success)");
 746                 break;
 747         case KVM_CAP_S390_RI:
 748                 r = -EINVAL;
 749                 mutex_lock(&kvm->lock);
 750                 if (kvm->created_vcpus) {
 751                         r = -EBUSY;
 752                 } else if (test_facility(64)) {
 753                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 754                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 755                         r = 0;
 756                 }
 757                 mutex_unlock(&kvm->lock);
 758                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 759                          r ? "(not available)" : "(success)");
 760                 break;
 761         case KVM_CAP_S390_AIS:
 762                 mutex_lock(&kvm->lock);
 763                 if (kvm->created_vcpus) {
 764                         r = -EBUSY;
 765                 } else {
 766                         set_kvm_facility(kvm->arch.model.fac_mask, 72);
 767                         set_kvm_facility(kvm->arch.model.fac_list, 72);
 768                         r = 0;
 769                 }
 770                 mutex_unlock(&kvm->lock);
 771                 VM_EVENT(kvm, 3, "ENABLE: AIS %s",
 772                          r ? "(not available)" : "(success)");
 773                 break;
 774         case KVM_CAP_S390_GS:
 775                 r = -EINVAL;
 776                 mutex_lock(&kvm->lock);
 777                 if (kvm->created_vcpus) {
 778                         r = -EBUSY;
 779                 } else if (test_facility(133)) {
 780                         set_kvm_facility(kvm->arch.model.fac_mask, 133);
 781                         set_kvm_facility(kvm->arch.model.fac_list, 133);
 782                         r = 0;
 783                 }
 784                 mutex_unlock(&kvm->lock);
 785                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
 786                          r ? "(not available)" : "(success)");
 787                 break;
 788         case KVM_CAP_S390_HPAGE_1M:
 789                 mutex_lock(&kvm->lock);
 790                 if (kvm->created_vcpus)
 791                         r = -EBUSY;
 792                 else if (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
 793                         r = -EINVAL;
 794                 else {
 795                         r = 0;
 796                         mmap_write_lock(kvm->mm);
 797                         kvm->mm->context.allow_gmap_hpage_1m = 1;
 798                         mmap_write_unlock(kvm->mm);
 799                         /*
 800                          * We might have to create fake 4k page
 801                          * tables. To avoid that the hardware works on
 802                          * stale PGSTEs, we emulate these instructions.
 803                          */
 804                         kvm->arch.use_skf = 0;
 805                         kvm->arch.use_pfmfi = 0;
 806                 }
 807                 mutex_unlock(&kvm->lock);
 808                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
 809                          r ? "(not available)" : "(success)");
 810                 break;
 811         case KVM_CAP_S390_USER_STSI:
 812                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 813                 kvm->arch.user_stsi = 1;
 814                 r = 0;
 815                 break;
 816         case KVM_CAP_S390_USER_INSTR0:
 817                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
 818                 kvm->arch.user_instr0 = 1;
 819                 icpt_operexc_on_all_vcpus(kvm);
 820                 r = 0;
 821                 break;
 822         default:
 823                 r = -EINVAL;
 824                 break;
 825         }
 826         return r;
 827 }
 828
 829 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 830 {
 831         int ret;
 832
 833         switch (attr->attr) {
 834         case KVM_S390_VM_MEM_LIMIT_SIZE:
 835                 ret = 0;
 836                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 837                          kvm->arch.mem_limit);
 838                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 839                         ret = -EFAULT;
 840                 break;
 841         default:
 842                 ret = -ENXIO;
 843                 break;
 844         }
 845         return ret;
 846 }
 847
 848 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 849 {
 850         int ret;
 851         unsigned int idx;
 852         switch (attr->attr) {
 853         case KVM_S390_VM_MEM_ENABLE_CMMA:
 854                 ret = -ENXIO;
 855                 if (!sclp.has_cmma)
 856                         break;
 857
 858                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 859                 mutex_lock(&kvm->lock);
 860                 if (kvm->created_vcpus)
 861                         ret = -EBUSY;
 862                 else if (kvm->mm->context.allow_gmap_hpage_1m)
 863                         ret = -EINVAL;
 864                 else {
 865                         kvm->arch.use_cmma = 1;
 866                         /* Not compatible with cmma. */
 867                         kvm->arch.use_pfmfi = 0;
 868                         ret = 0;
 869                 }
 870                 mutex_unlock(&kvm->lock);
 871                 break;
 872         case KVM_S390_VM_MEM_CLR_CMMA:
 873                 ret = -ENXIO;
 874                 if (!sclp.has_cmma)
 875                         break;
 876                 ret = -EINVAL;
 877                 if (!kvm->arch.use_cmma)
 878                         break;
 879
 880                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 881                 mutex_lock(&kvm->lock);
 882                 idx = srcu_read_lock(&kvm->srcu);
 883                 s390_reset_cmma(kvm->arch.gmap->mm);
 884                 srcu_read_unlock(&kvm->srcu, idx);
 885                 mutex_unlock(&kvm->lock);
 886                 ret = 0;
 887                 break;
 888         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 889                 unsigned long new_limit;
 890
 891                 if (kvm_is_ucontrol(kvm))
 892                         return -EINVAL;
 893
 894                 if (get_user(new_limit, (u64 __user *)attr->addr))
 895                         return -EFAULT;
 896
 897                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 898                     new_limit > kvm->arch.mem_limit)
 899                         return -E2BIG;
 900
 901                 if (!new_limit)
 902                         return -EINVAL;
 903
 904                 /* gmap_create takes last usable address */
 905                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 906                         new_limit -= 1;
 907
 908                 ret = -EBUSY;
 909                 mutex_lock(&kvm->lock);
 910                 if (!kvm->created_vcpus) {
 911                         /* gmap_create will round the limit up */
 912                         struct gmap *new = gmap_create(current->mm, new_limit);
 913
 914                         if (!new) {
 915                                 ret = -ENOMEM;
 916                         } else {
 917                                 gmap_remove(kvm->arch.gmap);
 918                                 new->private = kvm;
 919                                 kvm->arch.gmap = new;
 920                                 ret = 0;
 921                         }
 922                 }
 923                 mutex_unlock(&kvm->lock);
 924                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 925                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 926                          (void *) kvm->arch.gmap->asce);
 927                 break;
 928         }
 929         default:
 930                 ret = -ENXIO;
 931                 break;
 932         }
 933         return ret;
 934 }
 935
 936 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 937
 938 void kvm_s390_vcpu_crypto_reset_all(struct kvm *kvm)
 939 {
 940         struct kvm_vcpu *vcpu;
 941         int i;
 942
 943         kvm_s390_vcpu_block_all(kvm);
 944
 945         kvm_for_each_vcpu(i, vcpu, kvm) {
 946                 kvm_s390_vcpu_crypto_setup(vcpu);
 947                 /* recreate the shadow crycb by leaving the VSIE handler */
 948                 kvm_s390_sync_request(KVM_REQ_VSIE_RESTART, vcpu);
 949         }
 950
 951         kvm_s390_vcpu_unblock_all(kvm);
 952 }
 953
 954 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 955 {
 956         mutex_lock(&kvm->lock);
 957         switch (attr->attr) {
 958         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 959                 if (!test_kvm_facility(kvm, 76)) {
 960                         mutex_unlock(&kvm->lock);
 961                         return -EINVAL;
 962                 }
 963                 get_random_bytes(
 964                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 965                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 966                 kvm->arch.crypto.aes_kw = 1;
 967                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 968                 break;
 969         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 970                 if (!test_kvm_facility(kvm, 76)) {
 971                         mutex_unlock(&kvm->lock);
 972                         return -EINVAL;
 973                 }
 974                 get_random_bytes(
 975                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 976                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 977                 kvm->arch.crypto.dea_kw = 1;
 978                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 979                 break;
 980         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 981                 if (!test_kvm_facility(kvm, 76)) {
 982                         mutex_unlock(&kvm->lock);
 983                         return -EINVAL;
 984                 }
 985                 kvm->arch.crypto.aes_kw = 0;
 986                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 987                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 988                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 989                 break;
 990         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 991                 if (!test_kvm_facility(kvm, 76)) {
 992                         mutex_unlock(&kvm->lock);
 993                         return -EINVAL;
 994                 }
 995                 kvm->arch.crypto.dea_kw = 0;
 996                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 997                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 998                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 999                 break;
1000         case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1001                 if (!ap_instructions_available()) {
1002                         mutex_unlock(&kvm->lock);
1003                         return -EOPNOTSUPP;
1004                 }
1005                 kvm->arch.crypto.apie = 1;
1006                 break;
1007         case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1008                 if (!ap_instructions_available()) {
1009                         mutex_unlock(&kvm->lock);
1010                         return -EOPNOTSUPP;
1011                 }
1012                 kvm->arch.crypto.apie = 0;
1013                 break;
1014         default:
1015                 mutex_unlock(&kvm->lock);
1016                 return -ENXIO;
1017         }
1018
1019         kvm_s390_vcpu_crypto_reset_all(kvm);
1020         mutex_unlock(&kvm->lock);
1021         return 0;
1022 }
1023
1024 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
1025 {
1026         int cx;
1027         struct kvm_vcpu *vcpu;
1028
1029         kvm_for_each_vcpu(cx, vcpu, kvm)
1030                 kvm_s390_sync_request(req, vcpu);
1031 }
1032
1033 /*
1034  * Must be called with kvm->srcu held to avoid races on memslots, and with
1035  * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
1036  */
1037 static int kvm_s390_vm_start_migration(struct kvm *kvm)
1038 {
1039         struct kvm_memory_slot *ms;
1040         struct kvm_memslots *slots;
1041         unsigned long ram_pages = 0;
1042         int slotnr;
1043
1044         /* migration mode already enabled */
1045         if (kvm->arch.migration_mode)
1046                 return 0;
1047         slots = kvm_memslots(kvm);
1048         if (!slots || !slots->used_slots)
1049                 return -EINVAL;
1050
1051         if (!kvm->arch.use_cmma) {
1052                 kvm->arch.migration_mode = 1;
1053                 return 0;
1054         }
1055         /* mark all the pages in active slots as dirty */
1056         for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
1057                 ms = slots->memslots + slotnr;
1058                 if (!ms->dirty_bitmap)
1059                         return -EINVAL;
1060                 /*
1061                  * The second half of the bitmap is only used on x86,
1062                  * and would be wasted otherwise, so we put it to good
1063                  * use here to keep track of the state of the storage
1064                  * attributes.
1065                  */
1066                 memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
1067                 ram_pages += ms->npages;
1068         }
1069         atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
1070         kvm->arch.migration_mode = 1;
1071         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
1072         return 0;
1073 }
1074
1075 /*
1076  * Must be called with kvm->slots_lock to avoid races with ourselves and
1077  * kvm_s390_vm_start_migration.
1078  */
1079 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
1080 {
1081         /* migration mode already disabled */
1082         if (!kvm->arch.migration_mode)
1083                 return 0;
1084         kvm->arch.migration_mode = 0;
1085         if (kvm->arch.use_cmma)
1086                 kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
1087         return 0;
1088 }
1089
1090 static int kvm_s390_vm_set_migration(struct kvm *kvm,
1091                                      struct kvm_device_attr *attr)
1092 {
1093         int res = -ENXIO;
1094
1095         mutex_lock(&kvm->slots_lock);
1096         switch (attr->attr) {
1097         case KVM_S390_VM_MIGRATION_START:
1098                 res = kvm_s390_vm_start_migration(kvm);
1099                 break;
1100         case KVM_S390_VM_MIGRATION_STOP:
1101                 res = kvm_s390_vm_stop_migration(kvm);
1102                 break;
1103         default:
1104                 break;
1105         }
1106         mutex_unlock(&kvm->slots_lock);
1107
1108         return res;
1109 }
1110
1111 static int kvm_s390_vm_get_migration(struct kvm *kvm,
1112                                      struct kvm_device_attr *attr)
1113 {
1114         u64 mig = kvm->arch.migration_mode;
1115
1116         if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
1117                 return -ENXIO;
1118
1119         if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
1120                 return -EFAULT;
1121         return 0;
1122 }
1123
1124 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1125 {
1126         struct kvm_s390_vm_tod_clock gtod;
1127
1128         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
1129                 return -EFAULT;
1130
1131         if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
1132                 return -EINVAL;
1133         kvm_s390_set_tod_clock(kvm, &gtod);
1134
1135         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
1136                 gtod.epoch_idx, gtod.tod);
1137
1138         return 0;
1139 }
1140
1141 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1142 {
1143         u8 gtod_high;
1144
1145         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
1146                                            sizeof(gtod_high)))
1147                 return -EFAULT;
1148
1149         if (gtod_high != 0)
1150                 return -EINVAL;
1151         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
1152
1153         return 0;
1154 }
1155
1156 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1157 {
1158         struct kvm_s390_vm_tod_clock gtod = { 0 };
1159
1160         if (copy_from_user(&gtod.tod, (void __user *)attr->addr,
1161                            sizeof(gtod.tod)))
1162                 return -EFAULT;
1163
1164         kvm_s390_set_tod_clock(kvm, &gtod);
1165         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
1166         return 0;
1167 }
1168
1169 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1170 {
1171         int ret;
1172
1173         if (attr->flags)
1174                 return -EINVAL;
1175
1176         switch (attr->attr) {
1177         case KVM_S390_VM_TOD_EXT:
1178                 ret = kvm_s390_set_tod_ext(kvm, attr);
1179                 break;
1180         case KVM_S390_VM_TOD_HIGH:
1181                 ret = kvm_s390_set_tod_high(kvm, attr);
1182                 break;
1183         case KVM_S390_VM_TOD_LOW:
1184                 ret = kvm_s390_set_tod_low(kvm, attr);
1185                 break;
1186         default:
1187                 ret = -ENXIO;
1188                 break;
1189         }
1190         return ret;
1191 }
1192
1193 static void kvm_s390_get_tod_clock(struct kvm *kvm,
1194                                    struct kvm_s390_vm_tod_clock *gtod)
1195 {
1196         union tod_clock clk;
1197
1198         preempt_disable();
1199
1200         store_tod_clock_ext(&clk);
1201
1202         gtod->tod = clk.tod + kvm->arch.epoch;
1203         gtod->epoch_idx = 0;
1204         if (test_kvm_facility(kvm, 139)) {
1205                 gtod->epoch_idx = clk.ei + kvm->arch.epdx;
1206                 if (gtod->tod < clk.tod)
1207                         gtod->epoch_idx += 1;
1208         }
1209
1210         preempt_enable();
1211 }
1212
1213 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
1214 {
1215         struct kvm_s390_vm_tod_clock gtod;
1216
1217         memset(&gtod, 0, sizeof(gtod));
1218         kvm_s390_get_tod_clock(kvm, &gtod);
1219         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1220                 return -EFAULT;
1221
1222         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
1223                 gtod.epoch_idx, gtod.tod);
1224         return 0;
1225 }
1226
1227 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
1228 {
1229         u8 gtod_high = 0;
1230
1231         if (copy_to_user((void __user *)attr->addr, &gtod_high,
1232                                          sizeof(gtod_high)))
1233                 return -EFAULT;
1234         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
1235
1236         return 0;
1237 }
1238
1239 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1240 {
1241         u64 gtod;
1242
1243         gtod = kvm_s390_get_tod_clock_fast(kvm);
1244         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1245                 return -EFAULT;
1246         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1247
1248         return 0;
1249 }
1250
1251 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1252 {
1253         int ret;
1254
1255         if (attr->flags)
1256                 return -EINVAL;
1257
1258         switch (attr->attr) {
1259         case KVM_S390_VM_TOD_EXT:
1260                 ret = kvm_s390_get_tod_ext(kvm, attr);
1261                 break;
1262         case KVM_S390_VM_TOD_HIGH:
1263                 ret = kvm_s390_get_tod_high(kvm, attr);
1264                 break;
1265         case KVM_S390_VM_TOD_LOW:
1266                 ret = kvm_s390_get_tod_low(kvm, attr);
1267                 break;
1268         default:
1269                 ret = -ENXIO;
1270                 break;
1271         }
1272         return ret;
1273 }
1274
1275 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1276 {
1277         struct kvm_s390_vm_cpu_processor *proc;
1278         u16 lowest_ibc, unblocked_ibc;
1279         int ret = 0;
1280
1281         mutex_lock(&kvm->lock);
1282         if (kvm->created_vcpus) {
1283                 ret = -EBUSY;
1284                 goto out;
1285         }
1286         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1287         if (!proc) {
1288                 ret = -ENOMEM;
1289                 goto out;
1290         }
1291         if (!copy_from_user(proc, (void __user *)attr->addr,
1292                             sizeof(*proc))) {
1293                 kvm->arch.model.cpuid = proc->cpuid;
1294                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
1295                 unblocked_ibc = sclp.ibc & 0xfff;
1296                 if (lowest_ibc && proc->ibc) {
1297                         if (proc->ibc > unblocked_ibc)
1298                                 kvm->arch.model.ibc = unblocked_ibc;
1299                         else if (proc->ibc < lowest_ibc)
1300                                 kvm->arch.model.ibc = lowest_ibc;
1301                         else
1302                                 kvm->arch.model.ibc = proc->ibc;
1303                 }
1304                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
1305                        S390_ARCH_FAC_LIST_SIZE_BYTE);
1306                 VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1307                          kvm->arch.model.ibc,
1308                          kvm->arch.model.cpuid);
1309                 VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1310                          kvm->arch.model.fac_list[0],
1311                          kvm->arch.model.fac_list[1],
1312                          kvm->arch.model.fac_list[2]);
1313         } else
1314                 ret = -EFAULT;
1315         kfree(proc);
1316 out:
1317         mutex_unlock(&kvm->lock);
1318         return ret;
1319 }
1320
1321 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1322                                        struct kvm_device_attr *attr)
1323 {
1324         struct kvm_s390_vm_cpu_feat data;
1325
1326         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1327                 return -EFAULT;
1328         if (!bitmap_subset((unsigned long *) data.feat,
1329                            kvm_s390_available_cpu_feat,
1330                            KVM_S390_VM_CPU_FEAT_NR_BITS))
1331                 return -EINVAL;
1332
1333         mutex_lock(&kvm->lock);
1334         if (kvm->created_vcpus) {
1335                 mutex_unlock(&kvm->lock);
1336                 return -EBUSY;
1337         }
1338         bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1339                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1340         mutex_unlock(&kvm->lock);
1341         VM_EVENT(kvm, 3, "SET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1342                          data.feat[0],
1343                          data.feat[1],
1344                          data.feat[2]);
1345         return 0;
1346 }
1347
1348 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1349                                           struct kvm_device_attr *attr)
1350 {
1351         mutex_lock(&kvm->lock);
1352         if (kvm->created_vcpus) {
1353                 mutex_unlock(&kvm->lock);
1354                 return -EBUSY;
1355         }
1356
1357         if (copy_from_user(&kvm->arch.model.subfuncs, (void __user *)attr->addr,
1358                            sizeof(struct kvm_s390_vm_cpu_subfunc))) {
1359                 mutex_unlock(&kvm->lock);
1360                 return -EFAULT;
1361         }
1362         mutex_unlock(&kvm->lock);
1363
1364         VM_EVENT(kvm, 3, "SET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1365                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1366                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1367                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1368                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1369         VM_EVENT(kvm, 3, "SET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1370                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1371                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1372         VM_EVENT(kvm, 3, "SET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1373                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1374                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1375         VM_EVENT(kvm, 3, "SET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1376                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1377                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1378         VM_EVENT(kvm, 3, "SET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1379                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1380                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1381         VM_EVENT(kvm, 3, "SET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1382                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1383                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1384         VM_EVENT(kvm, 3, "SET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1385                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1386                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1387         VM_EVENT(kvm, 3, "SET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1388                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1389                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1390         VM_EVENT(kvm, 3, "SET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1391                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1392                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1393         VM_EVENT(kvm, 3, "SET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1394                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1395                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1396         VM_EVENT(kvm, 3, "SET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1397                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1398                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1399         VM_EVENT(kvm, 3, "SET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1400                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1401                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1402         VM_EVENT(kvm, 3, "SET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1403                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1404                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1405         VM_EVENT(kvm, 3, "SET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1406                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1407                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1408         VM_EVENT(kvm, 3, "SET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1409                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1410                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1411         VM_EVENT(kvm, 3, "SET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1412                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1413                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1414                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1415                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1416         VM_EVENT(kvm, 3, "SET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1417                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1418                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1419                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1420                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1421
1422         return 0;
1423 }
1424
1425 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1426 {
1427         int ret = -ENXIO;
1428
1429         switch (attr->attr) {
1430         case KVM_S390_VM_CPU_PROCESSOR:
1431                 ret = kvm_s390_set_processor(kvm, attr);
1432                 break;
1433         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1434                 ret = kvm_s390_set_processor_feat(kvm, attr);
1435                 break;
1436         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1437                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
1438                 break;
1439         }
1440         return ret;
1441 }
1442
1443 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1444 {
1445         struct kvm_s390_vm_cpu_processor *proc;
1446         int ret = 0;
1447
1448         proc = kzalloc(sizeof(*proc), GFP_KERNEL_ACCOUNT);
1449         if (!proc) {
1450                 ret = -ENOMEM;
1451                 goto out;
1452         }
1453         proc->cpuid = kvm->arch.model.cpuid;
1454         proc->ibc = kvm->arch.model.ibc;
1455         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1456                S390_ARCH_FAC_LIST_SIZE_BYTE);
1457         VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1458                  kvm->arch.model.ibc,
1459                  kvm->arch.model.cpuid);
1460         VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1461                  kvm->arch.model.fac_list[0],
1462                  kvm->arch.model.fac_list[1],
1463                  kvm->arch.model.fac_list[2]);
1464         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1465                 ret = -EFAULT;
1466         kfree(proc);
1467 out:
1468         return ret;
1469 }
1470
1471 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1472 {
1473         struct kvm_s390_vm_cpu_machine *mach;
1474         int ret = 0;
1475
1476         mach = kzalloc(sizeof(*mach), GFP_KERNEL_ACCOUNT);
1477         if (!mach) {
1478                 ret = -ENOMEM;
1479                 goto out;
1480         }
1481         get_cpu_id((struct cpuid *) &mach->cpuid);
1482         mach->ibc = sclp.ibc;
1483         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1484                S390_ARCH_FAC_LIST_SIZE_BYTE);
1485         memcpy((unsigned long *)&mach->fac_list, stfle_fac_list,
1486                sizeof(stfle_fac_list));
1487         VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1488                  kvm->arch.model.ibc,
1489                  kvm->arch.model.cpuid);
1490         VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1491                  mach->fac_mask[0],
1492                  mach->fac_mask[1],
1493                  mach->fac_mask[2]);
1494         VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1495                  mach->fac_list[0],
1496                  mach->fac_list[1],
1497                  mach->fac_list[2]);
1498         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1499                 ret = -EFAULT;
1500         kfree(mach);
1501 out:
1502         return ret;
1503 }
1504
1505 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1506                                        struct kvm_device_attr *attr)
1507 {
1508         struct kvm_s390_vm_cpu_feat data;
1509
1510         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1511                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1512         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1513                 return -EFAULT;
1514         VM_EVENT(kvm, 3, "GET: guest feat: 0x%16.16llx.0x%16.16llx.0x%16.16llx",
1515                          data.feat[0],
1516                          data.feat[1],
1517                          data.feat[2]);
1518         return 0;
1519 }
1520
1521 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1522                                      struct kvm_device_attr *attr)
1523 {
1524         struct kvm_s390_vm_cpu_feat data;
1525
1526         bitmap_copy((unsigned long *) data.feat,
1527                     kvm_s390_available_cpu_feat,
1528                     KVM_S390_VM_CPU_FEAT_NR_BITS);
1529         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1530                 return -EFAULT;
1531         VM_EVENT(kvm, 3, "GET: host feat:  0x%16.16llx.0x%16.16llx.0x%16.16llx",
1532                          data.feat[0],
1533                          data.feat[1],
1534                          data.feat[2]);
1535         return 0;
1536 }
1537
1538 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1539                                           struct kvm_device_attr *attr)
1540 {
1541         if (copy_to_user((void __user *)attr->addr, &kvm->arch.model.subfuncs,
1542             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1543                 return -EFAULT;
1544
1545         VM_EVENT(kvm, 3, "GET: guest PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1546                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[0],
1547                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[1],
1548                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[2],
1549                  ((unsigned long *) &kvm->arch.model.subfuncs.plo)[3]);
1550         VM_EVENT(kvm, 3, "GET: guest PTFF   subfunc 0x%16.16lx.%16.16lx",
1551                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[0],
1552                  ((unsigned long *) &kvm->arch.model.subfuncs.ptff)[1]);
1553         VM_EVENT(kvm, 3, "GET: guest KMAC   subfunc 0x%16.16lx.%16.16lx",
1554                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[0],
1555                  ((unsigned long *) &kvm->arch.model.subfuncs.kmac)[1]);
1556         VM_EVENT(kvm, 3, "GET: guest KMC    subfunc 0x%16.16lx.%16.16lx",
1557                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[0],
1558                  ((unsigned long *) &kvm->arch.model.subfuncs.kmc)[1]);
1559         VM_EVENT(kvm, 3, "GET: guest KM     subfunc 0x%16.16lx.%16.16lx",
1560                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[0],
1561                  ((unsigned long *) &kvm->arch.model.subfuncs.km)[1]);
1562         VM_EVENT(kvm, 3, "GET: guest KIMD   subfunc 0x%16.16lx.%16.16lx",
1563                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[0],
1564                  ((unsigned long *) &kvm->arch.model.subfuncs.kimd)[1]);
1565         VM_EVENT(kvm, 3, "GET: guest KLMD   subfunc 0x%16.16lx.%16.16lx",
1566                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[0],
1567                  ((unsigned long *) &kvm->arch.model.subfuncs.klmd)[1]);
1568         VM_EVENT(kvm, 3, "GET: guest PCKMO  subfunc 0x%16.16lx.%16.16lx",
1569                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[0],
1570                  ((unsigned long *) &kvm->arch.model.subfuncs.pckmo)[1]);
1571         VM_EVENT(kvm, 3, "GET: guest KMCTR  subfunc 0x%16.16lx.%16.16lx",
1572                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[0],
1573                  ((unsigned long *) &kvm->arch.model.subfuncs.kmctr)[1]);
1574         VM_EVENT(kvm, 3, "GET: guest KMF    subfunc 0x%16.16lx.%16.16lx",
1575                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[0],
1576                  ((unsigned long *) &kvm->arch.model.subfuncs.kmf)[1]);
1577         VM_EVENT(kvm, 3, "GET: guest KMO    subfunc 0x%16.16lx.%16.16lx",
1578                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[0],
1579                  ((unsigned long *) &kvm->arch.model.subfuncs.kmo)[1]);
1580         VM_EVENT(kvm, 3, "GET: guest PCC    subfunc 0x%16.16lx.%16.16lx",
1581                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[0],
1582                  ((unsigned long *) &kvm->arch.model.subfuncs.pcc)[1]);
1583         VM_EVENT(kvm, 3, "GET: guest PPNO   subfunc 0x%16.16lx.%16.16lx",
1584                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[0],
1585                  ((unsigned long *) &kvm->arch.model.subfuncs.ppno)[1]);
1586         VM_EVENT(kvm, 3, "GET: guest KMA    subfunc 0x%16.16lx.%16.16lx",
1587                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[0],
1588                  ((unsigned long *) &kvm->arch.model.subfuncs.kma)[1]);
1589         VM_EVENT(kvm, 3, "GET: guest KDSA   subfunc 0x%16.16lx.%16.16lx",
1590                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[0],
1591                  ((unsigned long *) &kvm->arch.model.subfuncs.kdsa)[1]);
1592         VM_EVENT(kvm, 3, "GET: guest SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1593                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[0],
1594                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[1],
1595                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[2],
1596                  ((unsigned long *) &kvm->arch.model.subfuncs.sortl)[3]);
1597         VM_EVENT(kvm, 3, "GET: guest DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1598                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[0],
1599                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[1],
1600                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[2],
1601                  ((unsigned long *) &kvm->arch.model.subfuncs.dfltcc)[3]);
1602
1603         return 0;
1604 }
1605
1606 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1607                                         struct kvm_device_attr *attr)
1608 {
1609         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1610             sizeof(struct kvm_s390_vm_cpu_subfunc)))
1611                 return -EFAULT;
1612
1613         VM_EVENT(kvm, 3, "GET: host  PLO    subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1614                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[0],
1615                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[1],
1616                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[2],
1617                  ((unsigned long *) &kvm_s390_available_subfunc.plo)[3]);
1618         VM_EVENT(kvm, 3, "GET: host  PTFF   subfunc 0x%16.16lx.%16.16lx",
1619                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[0],
1620                  ((unsigned long *) &kvm_s390_available_subfunc.ptff)[1]);
1621         VM_EVENT(kvm, 3, "GET: host  KMAC   subfunc 0x%16.16lx.%16.16lx",
1622                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[0],
1623                  ((unsigned long *) &kvm_s390_available_subfunc.kmac)[1]);
1624         VM_EVENT(kvm, 3, "GET: host  KMC    subfunc 0x%16.16lx.%16.16lx",
1625                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[0],
1626                  ((unsigned long *) &kvm_s390_available_subfunc.kmc)[1]);
1627         VM_EVENT(kvm, 3, "GET: host  KM     subfunc 0x%16.16lx.%16.16lx",
1628                  ((unsigned long *) &kvm_s390_available_subfunc.km)[0],
1629                  ((unsigned long *) &kvm_s390_available_subfunc.km)[1]);
1630         VM_EVENT(kvm, 3, "GET: host  KIMD   subfunc 0x%16.16lx.%16.16lx",
1631                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[0],
1632                  ((unsigned long *) &kvm_s390_available_subfunc.kimd)[1]);
1633         VM_EVENT(kvm, 3, "GET: host  KLMD   subfunc 0x%16.16lx.%16.16lx",
1634                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[0],
1635                  ((unsigned long *) &kvm_s390_available_subfunc.klmd)[1]);
1636         VM_EVENT(kvm, 3, "GET: host  PCKMO  subfunc 0x%16.16lx.%16.16lx",
1637                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[0],
1638                  ((unsigned long *) &kvm_s390_available_subfunc.pckmo)[1]);
1639         VM_EVENT(kvm, 3, "GET: host  KMCTR  subfunc 0x%16.16lx.%16.16lx",
1640                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[0],
1641                  ((unsigned long *) &kvm_s390_available_subfunc.kmctr)[1]);
1642         VM_EVENT(kvm, 3, "GET: host  KMF    subfunc 0x%16.16lx.%16.16lx",
1643                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[0],
1644                  ((unsigned long *) &kvm_s390_available_subfunc.kmf)[1]);
1645         VM_EVENT(kvm, 3, "GET: host  KMO    subfunc 0x%16.16lx.%16.16lx",
1646                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[0],
1647                  ((unsigned long *) &kvm_s390_available_subfunc.kmo)[1]);
1648         VM_EVENT(kvm, 3, "GET: host  PCC    subfunc 0x%16.16lx.%16.16lx",
1649                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[0],
1650                  ((unsigned long *) &kvm_s390_available_subfunc.pcc)[1]);
1651         VM_EVENT(kvm, 3, "GET: host  PPNO   subfunc 0x%16.16lx.%16.16lx",
1652                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[0],
1653                  ((unsigned long *) &kvm_s390_available_subfunc.ppno)[1]);
1654         VM_EVENT(kvm, 3, "GET: host  KMA    subfunc 0x%16.16lx.%16.16lx",
1655                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[0],
1656                  ((unsigned long *) &kvm_s390_available_subfunc.kma)[1]);
1657         VM_EVENT(kvm, 3, "GET: host  KDSA   subfunc 0x%16.16lx.%16.16lx",
1658                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[0],
1659                  ((unsigned long *) &kvm_s390_available_subfunc.kdsa)[1]);
1660         VM_EVENT(kvm, 3, "GET: host  SORTL  subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1661                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[0],
1662                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[1],
1663                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[2],
1664                  ((unsigned long *) &kvm_s390_available_subfunc.sortl)[3]);
1665         VM_EVENT(kvm, 3, "GET: host  DFLTCC subfunc 0x%16.16lx.%16.16lx.%16.16lx.%16.16lx",
1666                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[0],
1667                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[1],
1668                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[2],
1669                  ((unsigned long *) &kvm_s390_available_subfunc.dfltcc)[3]);
1670
1671         return 0;
1672 }
1673
1674 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1675 {
1676         int ret = -ENXIO;
1677
1678         switch (attr->attr) {
1679         case KVM_S390_VM_CPU_PROCESSOR:
1680                 ret = kvm_s390_get_processor(kvm, attr);
1681                 break;
1682         case KVM_S390_VM_CPU_MACHINE:
1683                 ret = kvm_s390_get_machine(kvm, attr);
1684                 break;
1685         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1686                 ret = kvm_s390_get_processor_feat(kvm, attr);
1687                 break;
1688         case KVM_S390_VM_CPU_MACHINE_FEAT:
1689                 ret = kvm_s390_get_machine_feat(kvm, attr);
1690                 break;
1691         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1692                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
1693                 break;
1694         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1695                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
1696                 break;
1697         }
1698         return ret;
1699 }
1700
1701 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1702 {
1703         int ret;
1704
1705         switch (attr->group) {
1706         case KVM_S390_VM_MEM_CTRL:
1707                 ret = kvm_s390_set_mem_control(kvm, attr);
1708                 break;
1709         case KVM_S390_VM_TOD:
1710                 ret = kvm_s390_set_tod(kvm, attr);
1711                 break;
1712         case KVM_S390_VM_CPU_MODEL:
1713                 ret = kvm_s390_set_cpu_model(kvm, attr);
1714                 break;
1715         case KVM_S390_VM_CRYPTO:
1716                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1717                 break;
1718         case KVM_S390_VM_MIGRATION:
1719                 ret = kvm_s390_vm_set_migration(kvm, attr);
1720                 break;
1721         default:
1722                 ret = -ENXIO;
1723                 break;
1724         }
1725
1726         return ret;
1727 }
1728
1729 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1730 {
1731         int ret;
1732
1733         switch (attr->group) {
1734         case KVM_S390_VM_MEM_CTRL:
1735                 ret = kvm_s390_get_mem_control(kvm, attr);
1736                 break;
1737         case KVM_S390_VM_TOD:
1738                 ret = kvm_s390_get_tod(kvm, attr);
1739                 break;
1740         case KVM_S390_VM_CPU_MODEL:
1741                 ret = kvm_s390_get_cpu_model(kvm, attr);
1742                 break;
1743         case KVM_S390_VM_MIGRATION:
1744                 ret = kvm_s390_vm_get_migration(kvm, attr);
1745                 break;
1746         default:
1747                 ret = -ENXIO;
1748                 break;
1749         }
1750
1751         return ret;
1752 }
1753
1754 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1755 {
1756         int ret;
1757
1758         switch (attr->group) {
1759         case KVM_S390_VM_MEM_CTRL:
1760                 switch (attr->attr) {
1761                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1762                 case KVM_S390_VM_MEM_CLR_CMMA:
1763                         ret = sclp.has_cmma ? 0 : -ENXIO;
1764                         break;
1765                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1766                         ret = 0;
1767                         break;
1768                 default:
1769                         ret = -ENXIO;
1770                         break;
1771                 }
1772                 break;
1773         case KVM_S390_VM_TOD:
1774                 switch (attr->attr) {
1775                 case KVM_S390_VM_TOD_LOW:
1776                 case KVM_S390_VM_TOD_HIGH:
1777                         ret = 0;
1778                         break;
1779                 default:
1780                         ret = -ENXIO;
1781                         break;
1782                 }
1783                 break;
1784         case KVM_S390_VM_CPU_MODEL:
1785                 switch (attr->attr) {
1786                 case KVM_S390_VM_CPU_PROCESSOR:
1787                 case KVM_S390_VM_CPU_MACHINE:
1788                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1789                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1790                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1791                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1792                         ret = 0;
1793                         break;
1794                 default:
1795                         ret = -ENXIO;
1796                         break;
1797                 }
1798                 break;
1799         case KVM_S390_VM_CRYPTO:
1800                 switch (attr->attr) {
1801                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1802                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1803                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1804                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1805                         ret = 0;
1806                         break;
1807                 case KVM_S390_VM_CRYPTO_ENABLE_APIE:
1808                 case KVM_S390_VM_CRYPTO_DISABLE_APIE:
1809                         ret = ap_instructions_available() ? 0 : -ENXIO;
1810                         break;
1811                 default:
1812                         ret = -ENXIO;
1813                         break;
1814                 }
1815                 break;
1816         case KVM_S390_VM_MIGRATION:
1817                 ret = 0;
1818                 break;
1819         default:
1820                 ret = -ENXIO;
1821                 break;
1822         }
1823
1824         return ret;
1825 }
1826
1827 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1828 {
1829         uint8_t *keys;
1830         uint64_t hva;
1831         int srcu_idx, i, r = 0;
1832
1833         if (args->flags != 0)
1834                 return -EINVAL;
1835
1836         /* Is this guest using storage keys? */
1837         if (!mm_uses_skeys(current->mm))
1838                 return KVM_S390_GET_SKEYS_NONE;
1839
1840         /* Enforce sane limit on memory allocation */
1841         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1842                 return -EINVAL;
1843
1844         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1845         if (!keys)
1846                 return -ENOMEM;
1847
1848         mmap_read_lock(current->mm);
1849         srcu_idx = srcu_read_lock(&kvm->srcu);
1850         for (i = 0; i < args->count; i++) {
1851                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1852                 if (kvm_is_error_hva(hva)) {
1853                         r = -EFAULT;
1854                         break;
1855                 }
1856
1857                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1858                 if (r)
1859                         break;
1860         }
1861         srcu_read_unlock(&kvm->srcu, srcu_idx);
1862         mmap_read_unlock(current->mm);
1863
1864         if (!r) {
1865                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1866                                  sizeof(uint8_t) * args->count);
1867                 if (r)
1868                         r = -EFAULT;
1869         }
1870
1871         kvfree(keys);
1872         return r;
1873 }
1874
1875 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1876 {
1877         uint8_t *keys;
1878         uint64_t hva;
1879         int srcu_idx, i, r = 0;
1880         bool unlocked;
1881
1882         if (args->flags != 0)
1883                 return -EINVAL;
1884
1885         /* Enforce sane limit on memory allocation */
1886         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1887                 return -EINVAL;
1888
1889         keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT);
1890         if (!keys)
1891                 return -ENOMEM;
1892
1893         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1894                            sizeof(uint8_t) * args->count);
1895         if (r) {
1896                 r = -EFAULT;
1897                 goto out;
1898         }
1899
1900         /* Enable storage key handling for the guest */
1901         r = s390_enable_skey();
1902         if (r)
1903                 goto out;
1904
1905         i = 0;
1906         mmap_read_lock(current->mm);
1907         srcu_idx = srcu_read_lock(&kvm->srcu);
1908         while (i < args->count) {
1909                 unlocked = false;
1910                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1911                 if (kvm_is_error_hva(hva)) {
1912                         r = -EFAULT;
1913                         break;
1914                 }
1915
1916                 /* Lowest order bit is reserved */
1917                 if (keys[i] & 0x01) {
1918                         r = -EINVAL;
1919                         break;
1920                 }
1921
1922                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1923                 if (r) {
1924                         r = fixup_user_fault(current->mm, hva,
1925                                              FAULT_FLAG_WRITE, &unlocked);
1926                         if (r)
1927                                 break;
1928                 }
1929                 if (!r)
1930                         i++;
1931         }
1932         srcu_read_unlock(&kvm->srcu, srcu_idx);
1933         mmap_read_unlock(current->mm);
1934 out:
1935         kvfree(keys);
1936         return r;
1937 }
1938
1939 /*
1940  * Base address and length must be sent at the start of each block, therefore
1941  * it's cheaper to send some clean data, as long as it's less than the size of
1942  * two longs.
1943  */
1944 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1945 /* for consistency */
1946 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1947
1948 /*
1949  * Similar to gfn_to_memslot, but returns the index of a memslot also when the
1950  * address falls in a hole. In that case the index of one of the memslots
1951  * bordering the hole is returned.
1952  */
1953 static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
1954 {
1955         int start = 0, end = slots->used_slots;
1956         int slot = atomic_read(&slots->lru_slot);
1957         struct kvm_memory_slot *memslots = slots->memslots;
1958
1959         if (gfn >= memslots[slot].base_gfn &&
1960             gfn < memslots[slot].base_gfn + memslots[slot].npages)
1961                 return slot;
1962
1963         while (start < end) {
1964                 slot = start + (end - start) / 2;
1965
1966                 if (gfn >= memslots[slot].base_gfn)
1967                         end = slot;
1968                 else
1969                         start = slot + 1;
1970         }
1971
1972         if (start >= slots->used_slots)
1973                 return slots->used_slots - 1;
1974
1975         if (gfn >= memslots[start].base_gfn &&
1976             gfn < memslots[start].base_gfn + memslots[start].npages) {
1977                 atomic_set(&slots->lru_slot, start);
1978         }
1979
1980         return start;
1981 }
1982
1983 static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
1984                               u8 *res, unsigned long bufsize)
1985 {
1986         unsigned long pgstev, hva, cur_gfn = args->start_gfn;
1987
1988         args->count = 0;
1989         while (args->count < bufsize) {
1990                 hva = gfn_to_hva(kvm, cur_gfn);
1991                 /*
1992                  * We return an error if the first value was invalid, but we
1993                  * return successfully if at least one value was copied.
1994                  */
1995                 if (kvm_is_error_hva(hva))
1996                         return args->count ? 0 : -EFAULT;
1997                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
1998                         pgstev = 0;
1999                 res[args->count++] = (pgstev >> 24) & 0x43;
2000                 cur_gfn++;
2001         }
2002
2003         return 0;
2004 }
2005
2006 static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
2007                                               unsigned long cur_gfn)
2008 {
2009         int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
2010         struct kvm_memory_slot *ms = slots->memslots + slotidx;
2011         unsigned long ofs = cur_gfn - ms->base_gfn;
2012
2013         if (ms->base_gfn + ms->npages <= cur_gfn) {
2014                 slotidx--;
2015                 /* If we are above the highest slot, wrap around */
2016                 if (slotidx < 0)
2017                         slotidx = slots->used_slots - 1;
2018
2019                 ms = slots->memslots + slotidx;
2020                 ofs = 0;
2021         }
2022         ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
2023         while ((slotidx > 0) && (ofs >= ms->npages)) {
2024                 slotidx--;
2025                 ms = slots->memslots + slotidx;
2026                 ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
2027         }
2028         return ms->base_gfn + ofs;
2029 }
2030
2031 static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
2032                              u8 *res, unsigned long bufsize)
2033 {
2034         unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
2035         struct kvm_memslots *slots = kvm_memslots(kvm);
2036         struct kvm_memory_slot *ms;
2037
2038         if (unlikely(!slots->used_slots))
2039                 return 0;
2040
2041         cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
2042         ms = gfn_to_memslot(kvm, cur_gfn);
2043         args->count = 0;
2044         args->start_gfn = cur_gfn;
2045         if (!ms)
2046                 return 0;
2047         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2048         mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
2049
2050         while (args->count < bufsize) {
2051                 hva = gfn_to_hva(kvm, cur_gfn);
2052                 if (kvm_is_error_hva(hva))
2053                         return 0;
2054                 /* Decrement only if we actually flipped the bit to 0 */
2055                 if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
2056                         atomic64_dec(&kvm->arch.cmma_dirty_pages);
2057                 if (get_pgste(kvm->mm, hva, &pgstev) < 0)
2058                         pgstev = 0;
2059                 /* Save the value */
2060                 res[args->count++] = (pgstev >> 24) & 0x43;
2061                 /* If the next bit is too far away, stop. */
2062                 if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE)
2063                         return 0;
2064                 /* If we reached the previous "next", find the next one */
2065                 if (cur_gfn == next_gfn)
2066                         next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
2067                 /* Reached the end of memory or of the buffer, stop */
2068                 if ((next_gfn >= mem_end) ||
2069                     (next_gfn - args->start_gfn >= bufsize))
2070                         return 0;
2071                 cur_gfn++;
2072                 /* Reached the end of the current memslot, take the next one. */
2073                 if (cur_gfn - ms->base_gfn >= ms->npages) {
2074                         ms = gfn_to_memslot(kvm, cur_gfn);
2075                         if (!ms)
2076                                 return 0;
2077                 }
2078         }
2079         return 0;
2080 }
2081
2082 /*
2083  * This function searches for the next page with dirty CMMA attributes, and
2084  * saves the attributes in the buffer up to either the end of the buffer or
2085  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
2086  * no trailing clean bytes are saved.
2087  * In case no dirty bits were found, or if CMMA was not enabled or used, the
2088  * output buffer will indicate 0 as length.
2089  */
2090 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
2091                                   struct kvm_s390_cmma_log *args)
2092 {
2093         unsigned long bufsize;
2094         int srcu_idx, peek, ret;
2095         u8 *values;
2096
2097         if (!kvm->arch.use_cmma)
2098                 return -ENXIO;
2099         /* Invalid/unsupported flags were specified */
2100         if (args->flags & ~KVM_S390_CMMA_PEEK)
2101                 return -EINVAL;
2102         /* Migration mode query, and we are not doing a migration */
2103         peek = !!(args->flags & KVM_S390_CMMA_PEEK);
2104         if (!peek && !kvm->arch.migration_mode)
2105                 return -EINVAL;
2106         /* CMMA is disabled or was not used, or the buffer has length zero */
2107         bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
2108         if (!bufsize || !kvm->mm->context.uses_cmm) {
2109                 memset(args, 0, sizeof(*args));
2110                 return 0;
2111         }
2112         /* We are not peeking, and there are no dirty pages */
2113         if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
2114                 memset(args, 0, sizeof(*args));
2115                 return 0;
2116         }
2117
2118         values = vmalloc(bufsize);
2119         if (!values)
2120                 return -ENOMEM;
2121
2122         mmap_read_lock(kvm->mm);
2123         srcu_idx = srcu_read_lock(&kvm->srcu);
2124         if (peek)
2125                 ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
2126         else
2127                 ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
2128         srcu_read_unlock(&kvm->srcu, srcu_idx);
2129         mmap_read_unlock(kvm->mm);
2130
2131         if (kvm->arch.migration_mode)
2132                 args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
2133         else
2134                 args->remaining = 0;
2135
2136         if (copy_to_user((void __user *)args->values, values, args->count))
2137                 ret = -EFAULT;
2138
2139         vfree(values);
2140         return ret;
2141 }
2142
2143 /*
2144  * This function sets the CMMA attributes for the given pages. If the input
2145  * buffer has zero length, no action is taken, otherwise the attributes are
2146  * set and the mm->context.uses_cmm flag is set.
2147  */
2148 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
2149                                   const struct kvm_s390_cmma_log *args)
2150 {
2151         unsigned long hva, mask, pgstev, i;
2152         uint8_t *bits;
2153         int srcu_idx, r = 0;
2154
2155         mask = args->mask;
2156
2157         if (!kvm->arch.use_cmma)
2158                 return -ENXIO;
2159         /* invalid/unsupported flags */
2160         if (args->flags != 0)
2161                 return -EINVAL;
2162         /* Enforce sane limit on memory allocation */
2163         if (args->count > KVM_S390_CMMA_SIZE_MAX)
2164                 return -EINVAL;
2165         /* Nothing to do */
2166         if (args->count == 0)
2167                 return 0;
2168
2169         bits = vmalloc(array_size(sizeof(*bits), args->count));
2170         if (!bits)
2171                 return -ENOMEM;
2172
2173         r = copy_from_user(bits, (void __user *)args->values, args->count);
2174         if (r) {
2175                 r = -EFAULT;
2176                 goto out;
2177         }
2178
2179         mmap_read_lock(kvm->mm);
2180         srcu_idx = srcu_read_lock(&kvm->srcu);
2181         for (i = 0; i < args->count; i++) {
2182                 hva = gfn_to_hva(kvm, args->start_gfn + i);
2183                 if (kvm_is_error_hva(hva)) {
2184                         r = -EFAULT;
2185                         break;
2186                 }
2187
2188                 pgstev = bits[i];
2189                 pgstev = pgstev << 24;
2190                 mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
2191                 set_pgste_bits(kvm->mm, hva, mask, pgstev);
2192         }
2193         srcu_read_unlock(&kvm->srcu, srcu_idx);
2194         mmap_read_unlock(kvm->mm);
2195
2196         if (!kvm->mm->context.uses_cmm) {
2197                 mmap_write_lock(kvm->mm);
2198                 kvm->mm->context.uses_cmm = 1;
2199                 mmap_write_unlock(kvm->mm);
2200         }
2201 out:
2202         vfree(bits);
2203         return r;
2204 }
2205
2206 static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
2207 {
2208         struct kvm_vcpu *vcpu;
2209         u16 rc, rrc;
2210         int ret = 0;
2211         int i;
2212
2213         /*
2214          * We ignore failures and try to destroy as many CPUs as possible.
2215          * At the same time we must not free the assigned resources when
2216          * this fails, as the ultravisor has still access to that memory.
2217          * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
2218          * behind.
2219          * We want to return the first failure rc and rrc, though.
2220          */
2221         kvm_for_each_vcpu(i, vcpu, kvm) {
2222                 mutex_lock(&vcpu->mutex);
2223                 if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
2224                         *rcp = rc;
2225                         *rrcp = rrc;
2226                         ret = -EIO;
2227                 }
2228                 mutex_unlock(&vcpu->mutex);
2229         }
2230         return ret;
2231 }
2232
2233 static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
2234 {
2235         int i, r = 0;
2236         u16 dummy;
2237
2238         struct kvm_vcpu *vcpu;
2239
2240         kvm_for_each_vcpu(i, vcpu, kvm) {
2241                 mutex_lock(&vcpu->mutex);
2242                 r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
2243                 mutex_unlock(&vcpu->mutex);
2244                 if (r)
2245                         break;
2246         }
2247         if (r)
2248                 kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
2249         return r;
2250 }
2251
2252 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
2253 {
2254         int r = 0;
2255         u16 dummy;
2256         void __user *argp = (void __user *)cmd->data;
2257
2258         switch (cmd->cmd) {
2259         case KVM_PV_ENABLE: {
2260                 r = -EINVAL;
2261                 if (kvm_s390_pv_is_protected(kvm))
2262                         break;
2263
2264                 /*
2265                  *  FMT 4 SIE needs esca. As we never switch back to bsca from
2266                  *  esca, we need no cleanup in the error cases below
2267                  */
2268                 r = sca_switch_to_extended(kvm);
2269                 if (r)
2270                         break;
2271
2272                 mmap_write_lock(current->mm);
2273                 r = gmap_mark_unmergeable();
2274                 mmap_write_unlock(current->mm);
2275                 if (r)
2276                         break;
2277
2278                 r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
2279                 if (r)
2280                         break;
2281
2282                 r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
2283                 if (r)
2284                         kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
2285
2286                 /* we need to block service interrupts from now on */
2287                 set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2288                 break;
2289         }
2290         case KVM_PV_DISABLE: {
2291                 r = -EINVAL;
2292                 if (!kvm_s390_pv_is_protected(kvm))
2293                         break;
2294
2295                 r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
2296                 /*
2297                  * If a CPU could not be destroyed, destroy VM will also fail.
2298                  * There is no point in trying to destroy it. Instead return
2299                  * the rc and rrc from the first CPU that failed destroying.
2300                  */
2301                 if (r)
2302                         break;
2303                 r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
2304
2305                 /* no need to block service interrupts any more */
2306                 clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
2307                 break;
2308         }
2309         case KVM_PV_SET_SEC_PARMS: {
2310                 struct kvm_s390_pv_sec_parm parms = {};
2311                 void *hdr;
2312
2313                 r = -EINVAL;
2314                 if (!kvm_s390_pv_is_protected(kvm))
2315                         break;
2316
2317                 r = -EFAULT;
2318                 if (copy_from_user(&parms, argp, sizeof(parms)))
2319                         break;
2320
2321                 /* Currently restricted to 8KB */
2322                 r = -EINVAL;
2323                 if (parms.length > PAGE_SIZE * 2)
2324                         break;
2325
2326                 r = -ENOMEM;
2327                 hdr = vmalloc(parms.length);
2328                 if (!hdr)
2329                         break;
2330
2331                 r = -EFAULT;
2332                 if (!copy_from_user(hdr, (void __user *)parms.origin,
2333                                     parms.length))
2334                         r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
2335                                                       &cmd->rc, &cmd->rrc);
2336
2337                 vfree(hdr);
2338                 break;
2339         }
2340         case KVM_PV_UNPACK: {
2341                 struct kvm_s390_pv_unp unp = {};
2342
2343                 r = -EINVAL;
2344                 if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
2345                         break;
2346
2347                 r = -EFAULT;
2348                 if (copy_from_user(&unp, argp, sizeof(unp)))
2349                         break;
2350
2351                 r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
2352                                        &cmd->rc, &cmd->rrc);
2353                 break;
2354         }
2355         case KVM_PV_VERIFY: {
2356                 r = -EINVAL;
2357                 if (!kvm_s390_pv_is_protected(kvm))
2358                         break;
2359
2360                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2361                                   UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
2362                 KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
2363                              cmd->rrc);
2364                 break;
2365         }
2366         case KVM_PV_PREP_RESET: {
2367                 r = -EINVAL;
2368                 if (!kvm_s390_pv_is_protected(kvm))
2369                         break;
2370
2371                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2372                                   UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
2373                 KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
2374                              cmd->rc, cmd->rrc);
2375                 break;
2376         }
2377         case KVM_PV_UNSHARE_ALL: {
2378                 r = -EINVAL;
2379                 if (!kvm_s390_pv_is_protected(kvm))
2380                         break;
2381
2382                 r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
2383                                   UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
2384                 KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
2385                              cmd->rc, cmd->rrc);
2386                 break;
2387         }
2388         default:
2389                 r = -ENOTTY;
2390         }
2391         return r;
2392 }
2393
2394 long kvm_arch_vm_ioctl(struct file *filp,
2395                        unsigned int ioctl, unsigned long arg)
2396 {
2397         struct kvm *kvm = filp->private_data;
2398         void __user *argp = (void __user *)arg;
2399         struct kvm_device_attr attr;
2400         int r;
2401
2402         switch (ioctl) {
2403         case KVM_S390_INTERRUPT: {
2404                 struct kvm_s390_interrupt s390int;
2405
2406                 r = -EFAULT;
2407                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2408                         break;
2409                 r = kvm_s390_inject_vm(kvm, &s390int);
2410                 break;
2411         }
2412         case KVM_CREATE_IRQCHIP: {
2413                 struct kvm_irq_routing_entry routing;
2414
2415                 r = -EINVAL;
2416                 if (kvm->arch.use_irqchip) {
2417                         /* Set up dummy routing. */
2418                         memset(&routing, 0, sizeof(routing));
2419                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
2420                 }
2421                 break;
2422         }
2423         case KVM_SET_DEVICE_ATTR: {
2424                 r = -EFAULT;
2425                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2426                         break;
2427                 r = kvm_s390_vm_set_attr(kvm, &attr);
2428                 break;
2429         }
2430         case KVM_GET_DEVICE_ATTR: {
2431                 r = -EFAULT;
2432                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2433                         break;
2434                 r = kvm_s390_vm_get_attr(kvm, &attr);
2435                 break;
2436         }
2437         case KVM_HAS_DEVICE_ATTR: {
2438                 r = -EFAULT;
2439                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
2440                         break;
2441                 r = kvm_s390_vm_has_attr(kvm, &attr);
2442                 break;
2443         }
2444         case KVM_S390_GET_SKEYS: {
2445                 struct kvm_s390_skeys args;
2446
2447                 r = -EFAULT;
2448                 if (copy_from_user(&args, argp,
2449                                    sizeof(struct kvm_s390_skeys)))
2450                         break;
2451                 r = kvm_s390_get_skeys(kvm, &args);
2452                 break;
2453         }
2454         case KVM_S390_SET_SKEYS: {
2455                 struct kvm_s390_skeys args;
2456
2457                 r = -EFAULT;
2458                 if (copy_from_user(&args, argp,
2459                                    sizeof(struct kvm_s390_skeys)))
2460                         break;
2461                 r = kvm_s390_set_skeys(kvm, &args);
2462                 break;
2463         }
2464         case KVM_S390_GET_CMMA_BITS: {
2465                 struct kvm_s390_cmma_log args;
2466
2467                 r = -EFAULT;
2468                 if (copy_from_user(&args, argp, sizeof(args)))
2469                         break;
2470                 mutex_lock(&kvm->slots_lock);
2471                 r = kvm_s390_get_cmma_bits(kvm, &args);
2472                 mutex_unlock(&kvm->slots_lock);
2473                 if (!r) {
2474                         r = copy_to_user(argp, &args, sizeof(args));
2475                         if (r)
2476                                 r = -EFAULT;
2477                 }
2478                 break;
2479         }
2480         case KVM_S390_SET_CMMA_BITS: {
2481                 struct kvm_s390_cmma_log args;
2482
2483                 r = -EFAULT;
2484                 if (copy_from_user(&args, argp, sizeof(args)))
2485                         break;
2486                 mutex_lock(&kvm->slots_lock);
2487                 r = kvm_s390_set_cmma_bits(kvm, &args);
2488                 mutex_unlock(&kvm->slots_lock);
2489                 break;
2490         }
2491         case KVM_S390_PV_COMMAND: {
2492                 struct kvm_pv_cmd args;
2493
2494                 /* protvirt means user sigp */
2495                 kvm->arch.user_cpu_state_ctrl = 1;
2496                 r = 0;
2497                 if (!is_prot_virt_host()) {
2498                         r = -EINVAL;
2499                         break;
2500                 }
2501                 if (copy_from_user(&args, argp, sizeof(args))) {
2502                         r = -EFAULT;
2503                         break;
2504                 }
2505                 if (args.flags) {
2506                         r = -EINVAL;
2507                         break;
2508                 }
2509                 mutex_lock(&kvm->lock);
2510                 r = kvm_s390_handle_pv(kvm, &args);
2511                 mutex_unlock(&kvm->lock);
2512                 if (copy_to_user(argp, &args, sizeof(args))) {
2513                         r = -EFAULT;
2514                         break;
2515                 }
2516                 break;
2517         }
2518         default:
2519                 r = -ENOTTY;
2520         }
2521
2522         return r;
2523 }
2524
2525 static int kvm_s390_apxa_installed(void)
2526 {
2527         struct ap_config_info info;
2528
2529         if (ap_instructions_available()) {
2530                 if (ap_qci(&info) == 0)
2531                         return info.apxa;
2532         }
2533
2534         return 0;
2535 }
2536
2537 /*
2538  * The format of the crypto control block (CRYCB) is specified in the 3 low
2539  * order bits of the CRYCB designation (CRYCBD) field as follows:
2540  * Format 0: Neither the message security assist extension 3 (MSAX3) nor the
2541  *           AP extended addressing (APXA) facility are installed.
2542  * Format 1: The APXA facility is not installed but the MSAX3 facility is.
2543  * Format 2: Both the APXA and MSAX3 facilities are installed
2544  */
2545 static void kvm_s390_set_crycb_format(struct kvm *kvm)
2546 {
2547         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
2548
2549         /* Clear the CRYCB format bits - i.e., set format 0 by default */
2550         kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
2551
2552         /* Check whether MSAX3 is installed */
2553         if (!test_kvm_facility(kvm, 76))
2554                 return;
2555
2556         if (kvm_s390_apxa_installed())
2557                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
2558         else
2559                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
2560 }
2561
2562 /*
2563  * kvm_arch_crypto_set_masks
2564  *
2565  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2566  *       to be set.
2567  * @apm: the mask identifying the accessible AP adapters
2568  * @aqm: the mask identifying the accessible AP domains
2569  * @adm: the mask identifying the accessible AP control domains
2570  *
2571  * Set the masks that identify the adapters, domains and control domains to
2572  * which the KVM guest is granted access.
2573  *
2574  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2575  *       function.
2576  */
2577 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
2578                                unsigned long *aqm, unsigned long *adm)
2579 {
2580         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
2581
2582         kvm_s390_vcpu_block_all(kvm);
2583
2584         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
2585         case CRYCB_FORMAT2: /* APCB1 use 256 bits */
2586                 memcpy(crycb->apcb1.apm, apm, 32);
2587                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx %016lx %016lx %016lx",
2588                          apm[0], apm[1], apm[2], apm[3]);
2589                 memcpy(crycb->apcb1.aqm, aqm, 32);
2590                 VM_EVENT(kvm, 3, "SET CRYCB: aqm %016lx %016lx %016lx %016lx",
2591                          aqm[0], aqm[1], aqm[2], aqm[3]);
2592                 memcpy(crycb->apcb1.adm, adm, 32);
2593                 VM_EVENT(kvm, 3, "SET CRYCB: adm %016lx %016lx %016lx %016lx",
2594                          adm[0], adm[1], adm[2], adm[3]);
2595                 break;
2596         case CRYCB_FORMAT1:
2597         case CRYCB_FORMAT0: /* Fall through both use APCB0 */
2598                 memcpy(crycb->apcb0.apm, apm, 8);
2599                 memcpy(crycb->apcb0.aqm, aqm, 2);
2600                 memcpy(crycb->apcb0.adm, adm, 2);
2601                 VM_EVENT(kvm, 3, "SET CRYCB: apm %016lx aqm %04x adm %04x",
2602                          apm[0], *((unsigned short *)aqm),
2603                          *((unsigned short *)adm));
2604                 break;
2605         default:        /* Can not happen */
2606                 break;
2607         }
2608
2609         /* recreate the shadow crycb for each vcpu */
2610         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2611         kvm_s390_vcpu_unblock_all(kvm);
2612 }
2613 EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
2614
2615 /*
2616  * kvm_arch_crypto_clear_masks
2617  *
2618  * @kvm: pointer to the target guest's KVM struct containing the crypto masks
2619  *       to be cleared.
2620  *
2621  * Clear the masks that identify the adapters, domains and control domains to
2622  * which the KVM guest is granted access.
2623  *
2624  * Note: The kvm->lock mutex must be locked by the caller before invoking this
2625  *       function.
2626  */
2627 void kvm_arch_crypto_clear_masks(struct kvm *kvm)
2628 {
2629         kvm_s390_vcpu_block_all(kvm);
2630
2631         memset(&kvm->arch.crypto.crycb->apcb0, 0,
2632                sizeof(kvm->arch.crypto.crycb->apcb0));
2633         memset(&kvm->arch.crypto.crycb->apcb1, 0,
2634                sizeof(kvm->arch.crypto.crycb->apcb1));
2635
2636         VM_EVENT(kvm, 3, "%s", "CLR CRYCB:");
2637         /* recreate the shadow crycb for each vcpu */
2638         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
2639         kvm_s390_vcpu_unblock_all(kvm);
2640 }
2641 EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
2642
2643 static u64 kvm_s390_get_initial_cpuid(void)
2644 {
2645         struct cpuid cpuid;
2646
2647         get_cpu_id(&cpuid);
2648         cpuid.version = 0xff;
2649         return *((u64 *) &cpuid);
2650 }
2651
2652 static void kvm_s390_crypto_init(struct kvm *kvm)
2653 {
2654         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
2655         kvm_s390_set_crycb_format(kvm);
2656         init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
2657
2658         if (!test_kvm_facility(kvm, 76))
2659                 return;
2660
2661         /* Enable AES/DEA protected key functions by default */
2662         kvm->arch.crypto.aes_kw = 1;
2663         kvm->arch.crypto.dea_kw = 1;
2664         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
2665                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
2666         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
2667                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
2668 }
2669
2670 static void sca_dispose(struct kvm *kvm)
2671 {
2672         if (kvm->arch.use_esca)
2673                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
2674         else
2675                 free_page((unsigned long)(kvm->arch.sca));
2676         kvm->arch.sca = NULL;
2677 }
2678
2679 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
2680 {
2681         gfp_t alloc_flags = GFP_KERNEL_ACCOUNT;
2682         int i, rc;
2683         char debug_name[16];
2684         static unsigned long sca_offset;
2685
2686         rc = -EINVAL;
2687 #ifdef CONFIG_KVM_S390_UCONTROL
2688         if (type & ~KVM_VM_S390_UCONTROL)
2689                 goto out_err;
2690         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
2691                 goto out_err;
2692 #else
2693         if (type)
2694                 goto out_err;
2695 #endif
2696
2697         rc = s390_enable_sie();
2698         if (rc)
2699                 goto out_err;
2700
2701         rc = -ENOMEM;
2702
2703         if (!sclp.has_64bscao)
2704                 alloc_flags |= GFP_DMA;
2705         rwlock_init(&kvm->arch.sca_lock);
2706         /* start with basic SCA */
2707         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
2708         if (!kvm->arch.sca)
2709                 goto out_err;
2710         mutex_lock(&kvm_lock);
2711         sca_offset += 16;
2712         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
2713                 sca_offset = 0;
2714         kvm->arch.sca = (struct bsca_block *)
2715                         ((char *) kvm->arch.sca + sca_offset);
2716         mutex_unlock(&kvm_lock);
2717
2718         sprintf(debug_name, "kvm-%u", current->pid);
2719
2720         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
2721         if (!kvm->arch.dbf)
2722                 goto out_err;
2723
2724         BUILD_BUG_ON(sizeof(struct sie_page2) != 4096);
2725         kvm->arch.sie_page2 =
2726              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL_ACCOUNT | GFP_DMA);
2727         if (!kvm->arch.sie_page2)
2728                 goto out_err;
2729
2730         kvm->arch.sie_page2->kvm = kvm;
2731         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
2732
2733         for (i = 0; i < kvm_s390_fac_size(); i++) {
2734                 kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
2735                                               (kvm_s390_fac_base[i] |
2736                                                kvm_s390_fac_ext[i]);
2737                 kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
2738                                               kvm_s390_fac_base[i];
2739         }
2740         kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
2741
2742         /* we are always in czam mode - even on pre z14 machines */
2743         set_kvm_facility(kvm->arch.model.fac_mask, 138);
2744         set_kvm_facility(kvm->arch.model.fac_list, 138);
2745         /* we emulate STHYI in kvm */
2746         set_kvm_facility(kvm->arch.model.fac_mask, 74);
2747         set_kvm_facility(kvm->arch.model.fac_list, 74);
2748         if (MACHINE_HAS_TLB_GUEST) {
2749                 set_kvm_facility(kvm->arch.model.fac_mask, 147);
2750                 set_kvm_facility(kvm->arch.model.fac_list, 147);
2751         }
2752
2753         if (css_general_characteristics.aiv && test_facility(65))
2754                 set_kvm_facility(kvm->arch.model.fac_mask, 65);
2755
2756         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
2757         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
2758
2759         kvm_s390_crypto_init(kvm);
2760
2761         mutex_init(&kvm->arch.float_int.ais_lock);
2762         spin_lock_init(&kvm->arch.float_int.lock);
2763         for (i = 0; i < FIRQ_LIST_COUNT; i++)
2764                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
2765         init_waitqueue_head(&kvm->arch.ipte_wq);
2766         mutex_init(&kvm->arch.ipte_mutex);
2767
2768         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
2769         VM_EVENT(kvm, 3, "vm created with type %lu", type);
2770
2771         if (type & KVM_VM_S390_UCONTROL) {
2772                 kvm->arch.gmap = NULL;
2773                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
2774         } else {
2775                 if (sclp.hamax == U64_MAX)
2776                         kvm->arch.mem_limit = TASK_SIZE_MAX;
2777                 else
2778                         kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
2779                                                     sclp.hamax + 1);
2780                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
2781                 if (!kvm->arch.gmap)
2782                         goto out_err;
2783                 kvm->arch.gmap->private = kvm;
2784                 kvm->arch.gmap->pfault_enabled = 0;
2785         }
2786
2787         kvm->arch.use_pfmfi = sclp.has_pfmfi;
2788         kvm->arch.use_skf = sclp.has_skey;
2789         spin_lock_init(&kvm->arch.start_stop_lock);
2790         kvm_s390_vsie_init(kvm);
2791         if (use_gisa)
2792                 kvm_s390_gisa_init(kvm);
2793         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
2794
2795         return 0;
2796 out_err:
2797         free_page((unsigned long)kvm->arch.sie_page2);
2798         debug_unregister(kvm->arch.dbf);
2799         sca_dispose(kvm);
2800         KVM_EVENT(3, "creation of vm failed: %d", rc);
2801         return rc;
2802 }
2803
2804 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2805 {
2806         u16 rc, rrc;
2807
2808         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2809         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2810         kvm_s390_clear_local_irqs(vcpu);
2811         kvm_clear_async_pf_completion_queue(vcpu);
2812         if (!kvm_is_ucontrol(vcpu->kvm))
2813                 sca_del_vcpu(vcpu);
2814
2815         if (kvm_is_ucontrol(vcpu->kvm))
2816                 gmap_remove(vcpu->arch.gmap);
2817
2818         if (vcpu->kvm->arch.use_cmma)
2819                 kvm_s390_vcpu_unsetup_cmma(vcpu);
2820         /* We can not hold the vcpu mutex here, we are already dying */
2821         if (kvm_s390_pv_cpu_get_handle(vcpu))
2822                 kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
2823         free_page((unsigned long)(vcpu->arch.sie_block));
2824 }
2825
2826 static void kvm_free_vcpus(struct kvm *kvm)
2827 {
2828         unsigned int i;
2829         struct kvm_vcpu *vcpu;
2830
2831         kvm_for_each_vcpu(i, vcpu, kvm)
2832                 kvm_vcpu_destroy(vcpu);
2833
2834         mutex_lock(&kvm->lock);
2835         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2836                 kvm->vcpus[i] = NULL;
2837
2838         atomic_set(&kvm->online_vcpus, 0);
2839         mutex_unlock(&kvm->lock);
2840 }
2841
2842 void kvm_arch_destroy_vm(struct kvm *kvm)
2843 {
2844         u16 rc, rrc;
2845
2846         kvm_free_vcpus(kvm);
2847         sca_dispose(kvm);
2848         kvm_s390_gisa_destroy(kvm);
2849         /*
2850          * We are already at the end of life and kvm->lock is not taken.
2851          * This is ok as the file descriptor is closed by now and nobody
2852          * can mess with the pv state. To avoid lockdep_assert_held from
2853          * complaining we do not use kvm_s390_pv_is_protected.
2854          */
2855         if (kvm_s390_pv_get_handle(kvm))
2856                 kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
2857         debug_unregister(kvm->arch.dbf);
2858         free_page((unsigned long)kvm->arch.sie_page2);
2859         if (!kvm_is_ucontrol(kvm))
2860                 gmap_remove(kvm->arch.gmap);
2861         kvm_s390_destroy_adapters(kvm);
2862         kvm_s390_clear_float_irqs(kvm);
2863         kvm_s390_vsie_destroy(kvm);
2864         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2865 }
2866
2867 /* Section: vcpu related */
2868 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2869 {
2870         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2871         if (!vcpu->arch.gmap)
2872                 return -ENOMEM;
2873         vcpu->arch.gmap->private = vcpu->kvm;
2874
2875         return 0;
2876 }
2877
2878 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2879 {
2880         if (!kvm_s390_use_sca_entries())
2881                 return;
2882         read_lock(&vcpu->kvm->arch.sca_lock);
2883         if (vcpu->kvm->arch.use_esca) {
2884                 struct esca_block *sca = vcpu->kvm->arch.sca;
2885
2886                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2887                 sca->cpu[vcpu->vcpu_id].sda = 0;
2888         } else {
2889                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2890
2891                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2892                 sca->cpu[vcpu->vcpu_id].sda = 0;
2893         }
2894         read_unlock(&vcpu->kvm->arch.sca_lock);
2895 }
2896
2897 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2898 {
2899         if (!kvm_s390_use_sca_entries()) {
2900                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2901
2902                 /* we still need the basic sca for the ipte control */
2903                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2904                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2905                 return;
2906         }
2907         read_lock(&vcpu->kvm->arch.sca_lock);
2908         if (vcpu->kvm->arch.use_esca) {
2909                 struct esca_block *sca = vcpu->kvm->arch.sca;
2910
2911                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2912                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2913                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2914                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2915                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2916         } else {
2917                 struct bsca_block *sca = vcpu->kvm->arch.sca;
2918
2919                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2920                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2921                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2922                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2923         }
2924         read_unlock(&vcpu->kvm->arch.sca_lock);
2925 }
2926
2927 /* Basic SCA to Extended SCA data copy routines */
2928 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2929 {
2930         d->sda = s->sda;
2931         d->sigp_ctrl.c = s->sigp_ctrl.c;
2932         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2933 }
2934
2935 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2936 {
2937         int i;
2938
2939         d->ipte_control = s->ipte_control;
2940         d->mcn[0] = s->mcn;
2941         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2942                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2943 }
2944
2945 static int sca_switch_to_extended(struct kvm *kvm)
2946 {
2947         struct bsca_block *old_sca = kvm->arch.sca;
2948         struct esca_block *new_sca;
2949         struct kvm_vcpu *vcpu;
2950         unsigned int vcpu_idx;
2951         u32 scaol, scaoh;
2952
2953         if (kvm->arch.use_esca)
2954                 return 0;
2955
2956         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL_ACCOUNT | __GFP_ZERO);
2957         if (!new_sca)
2958                 return -ENOMEM;
2959
2960         scaoh = (u32)((u64)(new_sca) >> 32);
2961         scaol = (u32)(u64)(new_sca) & ~0x3fU;
2962
2963         kvm_s390_vcpu_block_all(kvm);
2964         write_lock(&kvm->arch.sca_lock);
2965
2966         sca_copy_b_to_e(new_sca, old_sca);
2967
2968         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2969                 vcpu->arch.sie_block->scaoh = scaoh;
2970                 vcpu->arch.sie_block->scaol = scaol;
2971                 vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2972         }
2973         kvm->arch.sca = new_sca;
2974         kvm->arch.use_esca = 1;
2975
2976         write_unlock(&kvm->arch.sca_lock);
2977         kvm_s390_vcpu_unblock_all(kvm);
2978
2979         free_page((unsigned long)old_sca);
2980
2981         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2982                  old_sca, kvm->arch.sca);
2983         return 0;
2984 }
2985
2986 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2987 {
2988         int rc;
2989
2990         if (!kvm_s390_use_sca_entries()) {
2991                 if (id < KVM_MAX_VCPUS)
2992                         return true;
2993                 return false;
2994         }
2995         if (id < KVM_S390_BSCA_CPU_SLOTS)
2996                 return true;
2997         if (!sclp.has_esca || !sclp.has_64bscao)
2998                 return false;
2999
3000         mutex_lock(&kvm->lock);
3001         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
3002         mutex_unlock(&kvm->lock);
3003
3004         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
3005 }
3006
3007 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3008 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3009 {
3010         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
3011         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3012         vcpu->arch.cputm_start = get_tod_clock_fast();
3013         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3014 }
3015
3016 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3017 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3018 {
3019         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
3020         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3021         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3022         vcpu->arch.cputm_start = 0;
3023         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3024 }
3025
3026 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3027 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3028 {
3029         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
3030         vcpu->arch.cputm_enabled = true;
3031         __start_cpu_timer_accounting(vcpu);
3032 }
3033
3034 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
3035 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3036 {
3037         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
3038         __stop_cpu_timer_accounting(vcpu);
3039         vcpu->arch.cputm_enabled = false;
3040 }
3041
3042 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3043 {
3044         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3045         __enable_cpu_timer_accounting(vcpu);
3046         preempt_enable();
3047 }
3048
3049 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
3050 {
3051         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3052         __disable_cpu_timer_accounting(vcpu);
3053         preempt_enable();
3054 }
3055
3056 /* set the cpu timer - may only be called from the VCPU thread itself */
3057 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
3058 {
3059         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3060         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
3061         if (vcpu->arch.cputm_enabled)
3062                 vcpu->arch.cputm_start = get_tod_clock_fast();
3063         vcpu->arch.sie_block->cputm = cputm;
3064         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
3065         preempt_enable();
3066 }
3067
3068 /* update and get the cpu timer - can also be called from other VCPU threads */
3069 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
3070 {
3071         unsigned int seq;
3072         __u64 value;
3073
3074         if (unlikely(!vcpu->arch.cputm_enabled))
3075                 return vcpu->arch.sie_block->cputm;
3076
3077         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
3078         do {
3079                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
3080                 /*
3081                  * If the writer would ever execute a read in the critical
3082                  * section, e.g. in irq context, we have a deadlock.
3083                  */
3084                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
3085                 value = vcpu->arch.sie_block->cputm;
3086                 /* if cputm_start is 0, accounting is being started/stopped */
3087                 if (likely(vcpu->arch.cputm_start))
3088                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
3089         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
3090         preempt_enable();
3091         return value;
3092 }
3093
3094 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
3095 {
3096
3097         gmap_enable(vcpu->arch.enabled_gmap);
3098         kvm_s390_set_cpuflags(vcpu, CPUSTAT_RUNNING);
3099         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3100                 __start_cpu_timer_accounting(vcpu);
3101         vcpu->cpu = cpu;
3102 }
3103
3104 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
3105 {
3106         vcpu->cpu = -1;
3107         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
3108                 __stop_cpu_timer_accounting(vcpu);
3109         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_RUNNING);
3110         vcpu->arch.enabled_gmap = gmap_get_enabled();
3111         gmap_disable(vcpu->arch.enabled_gmap);
3112
3113 }
3114
3115 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
3116 {
3117         mutex_lock(&vcpu->kvm->lock);
3118         preempt_disable();
3119         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
3120         vcpu->arch.sie_block->epdx = vcpu->kvm->arch.epdx;
3121         preempt_enable();
3122         mutex_unlock(&vcpu->kvm->lock);
3123         if (!kvm_is_ucontrol(vcpu->kvm)) {
3124                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
3125                 sca_add_vcpu(vcpu);
3126         }
3127         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
3128                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3129         /* make vcpu_load load the right gmap on the first trigger */
3130         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
3131 }
3132
3133 static bool kvm_has_pckmo_subfunc(struct kvm *kvm, unsigned long nr)
3134 {
3135         if (test_bit_inv(nr, (unsigned long *)&kvm->arch.model.subfuncs.pckmo) &&
3136             test_bit_inv(nr, (unsigned long *)&kvm_s390_available_subfunc.pckmo))
3137                 return true;
3138         return false;
3139 }
3140
3141 static bool kvm_has_pckmo_ecc(struct kvm *kvm)
3142 {
3143         /* At least one ECC subfunction must be present */
3144         return kvm_has_pckmo_subfunc(kvm, 32) ||
3145                kvm_has_pckmo_subfunc(kvm, 33) ||
3146                kvm_has_pckmo_subfunc(kvm, 34) ||
3147                kvm_has_pckmo_subfunc(kvm, 40) ||
3148                kvm_has_pckmo_subfunc(kvm, 41);
3149
3150 }
3151
3152 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
3153 {
3154         /*
3155          * If the AP instructions are not being interpreted and the MSAX3
3156          * facility is not configured for the guest, there is nothing to set up.
3157          */
3158         if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76))
3159                 return;
3160
3161         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
3162         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
3163         vcpu->arch.sie_block->eca &= ~ECA_APIE;
3164         vcpu->arch.sie_block->ecd &= ~ECD_ECC;
3165
3166         if (vcpu->kvm->arch.crypto.apie)
3167                 vcpu->arch.sie_block->eca |= ECA_APIE;
3168
3169         /* Set up protected key support */
3170         if (vcpu->kvm->arch.crypto.aes_kw) {
3171                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
3172                 /* ecc is also wrapped with AES key */
3173                 if (kvm_has_pckmo_ecc(vcpu->kvm))
3174                         vcpu->arch.sie_block->ecd |= ECD_ECC;
3175         }
3176
3177         if (vcpu->kvm->arch.crypto.dea_kw)
3178                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
3179 }
3180
3181 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
3182 {
3183         free_page(vcpu->arch.sie_block->cbrlo);
3184         vcpu->arch.sie_block->cbrlo = 0;
3185 }
3186
3187 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
3188 {
3189         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
3190         if (!vcpu->arch.sie_block->cbrlo)
3191                 return -ENOMEM;
3192         return 0;
3193 }
3194
3195 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
3196 {
3197         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
3198
3199         vcpu->arch.sie_block->ibc = model->ibc;
3200         if (test_kvm_facility(vcpu->kvm, 7))
3201                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
3202 }
3203
3204 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
3205 {
3206         int rc = 0;
3207         u16 uvrc, uvrrc;
3208
3209         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
3210                                                     CPUSTAT_SM |
3211                                                     CPUSTAT_STOPPED);
3212
3213         if (test_kvm_facility(vcpu->kvm, 78))
3214                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2);
3215         else if (test_kvm_facility(vcpu->kvm, 8))
3216                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
3217
3218         kvm_s390_vcpu_setup_model(vcpu);
3219
3220         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
3221         if (MACHINE_HAS_ESOP)
3222                 vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
3223         if (test_kvm_facility(vcpu->kvm, 9))
3224                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
3225         if (test_kvm_facility(vcpu->kvm, 73))
3226                 vcpu->arch.sie_block->ecb |= ECB_TE;
3227
3228         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
3229                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
3230         if (test_kvm_facility(vcpu->kvm, 130))
3231                 vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
3232         vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
3233         if (sclp.has_cei)
3234                 vcpu->arch.sie_block->eca |= ECA_CEI;
3235         if (sclp.has_ib)
3236                 vcpu->arch.sie_block->eca |= ECA_IB;
3237         if (sclp.has_siif)
3238                 vcpu->arch.sie_block->eca |= ECA_SII;
3239         if (sclp.has_sigpif)
3240                 vcpu->arch.sie_block->eca |= ECA_SIGPI;
3241         if (test_kvm_facility(vcpu->kvm, 129)) {
3242                 vcpu->arch.sie_block->eca |= ECA_VX;
3243                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3244         }
3245         if (test_kvm_facility(vcpu->kvm, 139))
3246                 vcpu->arch.sie_block->ecd |= ECD_MEF;
3247         if (test_kvm_facility(vcpu->kvm, 156))
3248                 vcpu->arch.sie_block->ecd |= ECD_ETOKENF;
3249         if (vcpu->arch.sie_block->gd) {
3250                 vcpu->arch.sie_block->eca |= ECA_AIV;
3251                 VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
3252                            vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
3253         }
3254         vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
3255                                         | SDNXC;
3256         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
3257
3258         if (sclp.has_kss)
3259                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
3260         else
3261                 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
3262
3263         if (vcpu->kvm->arch.use_cmma) {
3264                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
3265                 if (rc)
3266                         return rc;
3267         }
3268         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
3269         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
3270
3271         vcpu->arch.sie_block->hpid = HPID_KVM;
3272
3273         kvm_s390_vcpu_crypto_setup(vcpu);
3274
3275         mutex_lock(&vcpu->kvm->lock);
3276         if (kvm_s390_pv_is_protected(vcpu->kvm)) {
3277                 rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
3278                 if (rc)
3279                         kvm_s390_vcpu_unsetup_cmma(vcpu);
3280         }
3281         mutex_unlock(&vcpu->kvm->lock);
3282
3283         return rc;
3284 }
3285
3286 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
3287 {
3288         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
3289                 return -EINVAL;
3290         return 0;
3291 }
3292
3293 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
3294 {
3295         struct sie_page *sie_page;
3296         int rc;
3297
3298         BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
3299         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL_ACCOUNT);
3300         if (!sie_page)
3301                 return -ENOMEM;
3302
3303         vcpu->arch.sie_block = &sie_page->sie_block;
3304         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
3305
3306         /* the real guest size will always be smaller than msl */
3307         vcpu->arch.sie_block->mso = 0;
3308         vcpu->arch.sie_block->msl = sclp.hamax;
3309
3310         vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
3311         spin_lock_init(&vcpu->arch.local_int.lock);
3312         vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
3313         if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
3314                 vcpu->arch.sie_block->gd |= GISA_FORMAT1;
3315         seqcount_init(&vcpu->arch.cputm_seqcount);
3316
3317         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3318         kvm_clear_async_pf_completion_queue(vcpu);
3319         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
3320                                     KVM_SYNC_GPRS |
3321                                     KVM_SYNC_ACRS |
3322                                     KVM_SYNC_CRS |
3323                                     KVM_SYNC_ARCH0 |
3324                                     KVM_SYNC_PFAULT |
3325                                     KVM_SYNC_DIAG318;
3326         kvm_s390_set_prefix(vcpu, 0);
3327         if (test_kvm_facility(vcpu->kvm, 64))
3328                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
3329         if (test_kvm_facility(vcpu->kvm, 82))
3330                 vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
3331         if (test_kvm_facility(vcpu->kvm, 133))
3332                 vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
3333         if (test_kvm_facility(vcpu->kvm, 156))
3334                 vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
3335         /* fprs can be synchronized via vrs, even if the guest has no vx. With
3336          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
3337          */
3338         if (MACHINE_HAS_VX)
3339                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
3340         else
3341                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
3342
3343         if (kvm_is_ucontrol(vcpu->kvm)) {
3344                 rc = __kvm_ucontrol_vcpu_init(vcpu);
3345                 if (rc)
3346                         goto out_free_sie_block;
3347         }
3348
3349         VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
3350                  vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3351         trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
3352
3353         rc = kvm_s390_vcpu_setup(vcpu);
3354         if (rc)
3355                 goto out_ucontrol_uninit;
3356         return 0;
3357
3358 out_ucontrol_uninit:
3359         if (kvm_is_ucontrol(vcpu->kvm))
3360                 gmap_remove(vcpu->arch.gmap);
3361 out_free_sie_block:
3362         free_page((unsigned long)(vcpu->arch.sie_block));
3363         return rc;
3364 }
3365
3366 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
3367 {
3368         return kvm_s390_vcpu_has_irq(vcpu, 0);
3369 }
3370
3371 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
3372 {
3373         return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
3374 }
3375
3376 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
3377 {
3378         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3379         exit_sie(vcpu);
3380 }
3381
3382 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
3383 {
3384         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
3385 }
3386
3387 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
3388 {
3389         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3390         exit_sie(vcpu);
3391 }
3392
3393 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu)
3394 {
3395         return atomic_read(&vcpu->arch.sie_block->prog20) &
3396                (PROG_BLOCK_SIE | PROG_REQUEST);
3397 }
3398
3399 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
3400 {
3401         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
3402 }
3403
3404 /*
3405  * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running.
3406  * If the CPU is not running (e.g. waiting as idle) the function will
3407  * return immediately. */
3408 void exit_sie(struct kvm_vcpu *vcpu)
3409 {
3410         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
3411         kvm_s390_vsie_kick(vcpu);
3412         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
3413                 cpu_relax();
3414 }
3415
3416 /* Kick a guest cpu out of SIE to process a request synchronously */
3417 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
3418 {
3419         kvm_make_request(req, vcpu);
3420         kvm_s390_vcpu_request(vcpu);
3421 }
3422
3423 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
3424                               unsigned long end)
3425 {
3426         struct kvm *kvm = gmap->private;
3427         struct kvm_vcpu *vcpu;
3428         unsigned long prefix;
3429         int i;
3430
3431         if (gmap_is_shadow(gmap))
3432                 return;
3433         if (start >= 1UL << 31)
3434                 /* We are only interested in prefix pages */
3435                 return;
3436         kvm_for_each_vcpu(i, vcpu, kvm) {
3437                 /* match against both prefix pages */
3438                 prefix = kvm_s390_get_prefix(vcpu);
3439                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
3440                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
3441                                    start, end);
3442                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
3443                 }
3444         }
3445 }
3446
3447 bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
3448 {
3449         /* do not poll with more than halt_poll_max_steal percent of steal time */
3450         if (S390_lowcore.avg_steal_timer * 100 / (TICK_USEC << 12) >=
3451             halt_poll_max_steal) {
3452                 vcpu->stat.halt_no_poll_steal++;
3453                 return true;
3454         }
3455         return false;
3456 }
3457
3458 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
3459 {
3460         /* kvm common code refers to this, but never calls it */
3461         BUG();
3462         return 0;
3463 }
3464
3465 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
3466                                            struct kvm_one_reg *reg)
3467 {
3468         int r = -EINVAL;
3469
3470         switch (reg->id) {
3471         case KVM_REG_S390_TODPR:
3472                 r = put_user(vcpu->arch.sie_block->todpr,
3473                              (u32 __user *)reg->addr);
3474                 break;
3475         case KVM_REG_S390_EPOCHDIFF:
3476                 r = put_user(vcpu->arch.sie_block->epoch,
3477                              (u64 __user *)reg->addr);
3478                 break;
3479         case KVM_REG_S390_CPU_TIMER:
3480                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
3481                              (u64 __user *)reg->addr);
3482                 break;
3483         case KVM_REG_S390_CLOCK_COMP:
3484                 r = put_user(vcpu->arch.sie_block->ckc,
3485                              (u64 __user *)reg->addr);
3486                 break;
3487         case KVM_REG_S390_PFTOKEN:
3488                 r = put_user(vcpu->arch.pfault_token,
3489                              (u64 __user *)reg->addr);
3490                 break;
3491         case KVM_REG_S390_PFCOMPARE:
3492                 r = put_user(vcpu->arch.pfault_compare,
3493                              (u64 __user *)reg->addr);
3494                 break;
3495         case KVM_REG_S390_PFSELECT:
3496                 r = put_user(vcpu->arch.pfault_select,
3497                              (u64 __user *)reg->addr);
3498                 break;
3499         case KVM_REG_S390_PP:
3500                 r = put_user(vcpu->arch.sie_block->pp,
3501                              (u64 __user *)reg->addr);
3502                 break;
3503         case KVM_REG_S390_GBEA:
3504                 r = put_user(vcpu->arch.sie_block->gbea,
3505                              (u64 __user *)reg->addr);
3506                 break;
3507         default:
3508                 break;
3509         }
3510
3511         return r;
3512 }
3513
3514 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
3515                                            struct kvm_one_reg *reg)
3516 {
3517         int r = -EINVAL;
3518         __u64 val;
3519
3520         switch (reg->id) {
3521         case KVM_REG_S390_TODPR:
3522                 r = get_user(vcpu->arch.sie_block->todpr,
3523                              (u32 __user *)reg->addr);
3524                 break;
3525         case KVM_REG_S390_EPOCHDIFF:
3526                 r = get_user(vcpu->arch.sie_block->epoch,
3527                              (u64 __user *)reg->addr);
3528                 break;
3529         case KVM_REG_S390_CPU_TIMER:
3530                 r = get_user(val, (u64 __user *)reg->addr);
3531                 if (!r)
3532                         kvm_s390_set_cpu_timer(vcpu, val);
3533                 break;
3534         case KVM_REG_S390_CLOCK_COMP:
3535                 r = get_user(vcpu->arch.sie_block->ckc,
3536                              (u64 __user *)reg->addr);
3537                 break;
3538         case KVM_REG_S390_PFTOKEN:
3539                 r = get_user(vcpu->arch.pfault_token,
3540                              (u64 __user *)reg->addr);
3541                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3542                         kvm_clear_async_pf_completion_queue(vcpu);
3543                 break;
3544         case KVM_REG_S390_PFCOMPARE:
3545                 r = get_user(vcpu->arch.pfault_compare,
3546                              (u64 __user *)reg->addr);
3547                 break;
3548         case KVM_REG_S390_PFSELECT:
3549                 r = get_user(vcpu->arch.pfault_select,
3550                              (u64 __user *)reg->addr);
3551                 break;
3552         case KVM_REG_S390_PP:
3553                 r = get_user(vcpu->arch.sie_block->pp,
3554                              (u64 __user *)reg->addr);
3555                 break;
3556         case KVM_REG_S390_GBEA:
3557                 r = get_user(vcpu->arch.sie_block->gbea,
3558                              (u64 __user *)reg->addr);
3559                 break;
3560         default:
3561                 break;
3562         }
3563
3564         return r;
3565 }
3566
3567 static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
3568 {
3569         vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
3570         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
3571         memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
3572
3573         kvm_clear_async_pf_completion_queue(vcpu);
3574         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
3575                 kvm_s390_vcpu_stop(vcpu);
3576         kvm_s390_clear_local_irqs(vcpu);
3577 }
3578
3579 static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
3580 {
3581         /* Initial reset is a superset of the normal reset */
3582         kvm_arch_vcpu_ioctl_normal_reset(vcpu);
3583
3584         /*
3585          * This equals initial cpu reset in pop, but we don't switch to ESA.
3586          * We do not only reset the internal data, but also ...
3587          */
3588         vcpu->arch.sie_block->gpsw.mask = 0;
3589         vcpu->arch.sie_block->gpsw.addr = 0;
3590         kvm_s390_set_prefix(vcpu, 0);
3591         kvm_s390_set_cpu_timer(vcpu, 0);
3592         vcpu->arch.sie_block->ckc = 0;
3593         memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
3594         vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
3595         vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
3596
3597         /* ... the data in sync regs */
3598         memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
3599         vcpu->run->s.regs.ckc = 0;
3600         vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
3601         vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
3602         vcpu->run->psw_addr = 0;
3603         vcpu->run->psw_mask = 0;
3604         vcpu->run->s.regs.todpr = 0;
3605         vcpu->run->s.regs.cputm = 0;
3606         vcpu->run->s.regs.ckc = 0;
3607         vcpu->run->s.regs.pp = 0;
3608         vcpu->run->s.regs.gbea = 1;
3609         vcpu->run->s.regs.fpc = 0;
3610         /*
3611          * Do not reset these registers in the protected case, as some of
3612          * them are overlayed and they are not accessible in this case
3613          * anyway.
3614          */
3615         if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3616                 vcpu->arch.sie_block->gbea = 1;
3617                 vcpu->arch.sie_block->pp = 0;
3618                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
3619                 vcpu->arch.sie_block->todpr = 0;
3620         }
3621 }
3622
3623 static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
3624 {
3625         struct kvm_sync_regs *regs = &vcpu->run->s.regs;
3626
3627         /* Clear reset is a superset of the initial reset */
3628         kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3629
3630         memset(&regs->gprs, 0, sizeof(regs->gprs));
3631         memset(&regs->vrs, 0, sizeof(regs->vrs));
3632         memset(&regs->acrs, 0, sizeof(regs->acrs));
3633         memset(&regs->gscb, 0, sizeof(regs->gscb));
3634
3635         regs->etoken = 0;
3636         regs->etoken_extension = 0;
3637 }
3638
3639 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3640 {
3641         vcpu_load(vcpu);
3642         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
3643         vcpu_put(vcpu);
3644         return 0;
3645 }
3646
3647 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3648 {
3649         vcpu_load(vcpu);
3650         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
3651         vcpu_put(vcpu);
3652         return 0;
3653 }
3654
3655 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3656                                   struct kvm_sregs *sregs)
3657 {
3658         vcpu_load(vcpu);
3659
3660         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
3661         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
3662
3663         vcpu_put(vcpu);
3664         return 0;
3665 }
3666
3667 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3668                                   struct kvm_sregs *sregs)
3669 {
3670         vcpu_load(vcpu);
3671
3672         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
3673         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
3674
3675         vcpu_put(vcpu);
3676         return 0;
3677 }
3678
3679 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3680 {
3681         int ret = 0;
3682
3683         vcpu_load(vcpu);
3684
3685         if (test_fp_ctl(fpu->fpc)) {
3686                 ret = -EINVAL;
3687                 goto out;
3688         }
3689         vcpu->run->s.regs.fpc = fpu->fpc;
3690         if (MACHINE_HAS_VX)
3691                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
3692                                  (freg_t *) fpu->fprs);
3693         else
3694                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
3695
3696 out:
3697         vcpu_put(vcpu);
3698         return ret;
3699 }
3700
3701 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3702 {
3703         vcpu_load(vcpu);
3704
3705         /* make sure we have the latest values */
3706         save_fpu_regs();
3707         if (MACHINE_HAS_VX)
3708                 convert_vx_to_fp((freg_t *) fpu->fprs,
3709                                  (__vector128 *) vcpu->run->s.regs.vrs);
3710         else
3711                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
3712         fpu->fpc = vcpu->run->s.regs.fpc;
3713
3714         vcpu_put(vcpu);
3715         return 0;
3716 }
3717
3718 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
3719 {
3720         int rc = 0;
3721
3722         if (!is_vcpu_stopped(vcpu))
3723                 rc = -EBUSY;
3724         else {
3725                 vcpu->run->psw_mask = psw.mask;
3726                 vcpu->run->psw_addr = psw.addr;
3727         }
3728         return rc;
3729 }
3730
3731 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3732                                   struct kvm_translation *tr)
3733 {
3734         return -EINVAL; /* not implemented yet */
3735 }
3736
3737 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
3738                               KVM_GUESTDBG_USE_HW_BP | \
3739                               KVM_GUESTDBG_ENABLE)
3740
3741 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
3742                                         struct kvm_guest_debug *dbg)
3743 {
3744         int rc = 0;
3745
3746         vcpu_load(vcpu);
3747
3748         vcpu->guest_debug = 0;
3749         kvm_s390_clear_bp_data(vcpu);
3750
3751         if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
3752                 rc = -EINVAL;
3753                 goto out;
3754         }
3755         if (!sclp.has_gpere) {
3756                 rc = -EINVAL;
3757                 goto out;
3758         }
3759
3760         if (dbg->control & KVM_GUESTDBG_ENABLE) {
3761                 vcpu->guest_debug = dbg->control;
3762                 /* enforce guest PER */
3763                 kvm_s390_set_cpuflags(vcpu, CPUSTAT_P);
3764
3765                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
3766                         rc = kvm_s390_import_bp_data(vcpu, dbg);
3767         } else {
3768                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3769                 vcpu->arch.guestdbg.last_bp = 0;
3770         }
3771
3772         if (rc) {
3773                 vcpu->guest_debug = 0;
3774                 kvm_s390_clear_bp_data(vcpu);
3775                 kvm_s390_clear_cpuflags(vcpu, CPUSTAT_P);
3776         }
3777
3778 out:
3779         vcpu_put(vcpu);
3780         return rc;
3781 }
3782
3783 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3784                                     struct kvm_mp_state *mp_state)
3785 {
3786         int ret;
3787
3788         vcpu_load(vcpu);
3789
3790         /* CHECK_STOP and LOAD are not supported yet */
3791         ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
3792                                       KVM_MP_STATE_OPERATING;
3793
3794         vcpu_put(vcpu);
3795         return ret;
3796 }
3797
3798 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3799                                     struct kvm_mp_state *mp_state)
3800 {
3801         int rc = 0;
3802
3803         vcpu_load(vcpu);
3804
3805         /* user space knows about this interface - let it control the state */
3806         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
3807
3808         switch (mp_state->mp_state) {
3809         case KVM_MP_STATE_STOPPED:
3810                 rc = kvm_s390_vcpu_stop(vcpu);
3811                 break;
3812         case KVM_MP_STATE_OPERATING:
3813                 rc = kvm_s390_vcpu_start(vcpu);
3814                 break;
3815         case KVM_MP_STATE_LOAD:
3816                 if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
3817                         rc = -ENXIO;
3818                         break;
3819                 }
3820                 rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
3821                 break;
3822         case KVM_MP_STATE_CHECK_STOP:
3823                 fallthrough;    /* CHECK_STOP and LOAD are not supported yet */
3824         default:
3825                 rc = -ENXIO;
3826         }
3827
3828         vcpu_put(vcpu);
3829         return rc;
3830 }
3831
3832 static bool ibs_enabled(struct kvm_vcpu *vcpu)
3833 {
3834         return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS);
3835 }
3836
3837 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
3838 {
3839 retry:
3840         kvm_s390_vcpu_request_handled(vcpu);
3841         if (!kvm_request_pending(vcpu))
3842                 return 0;
3843         /*
3844          * We use MMU_RELOAD just to re-arm the ipte notifier for the
3845          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
3846          * This ensures that the ipte instruction for this request has
3847          * already finished. We might race against a second unmapper that
3848          * wants to set the blocking bit. Lets just retry the request loop.
3849          */
3850         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
3851                 int rc;
3852                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
3853                                           kvm_s390_get_prefix(vcpu),
3854                                           PAGE_SIZE * 2, PROT_WRITE);
3855                 if (rc) {
3856                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
3857                         return rc;
3858                 }
3859                 goto retry;
3860         }
3861
3862         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
3863                 vcpu->arch.sie_block->ihcpu = 0xffff;
3864                 goto retry;
3865         }
3866
3867         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
3868                 if (!ibs_enabled(vcpu)) {
3869                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
3870                         kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
3871                 }
3872                 goto retry;
3873         }
3874
3875         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
3876                 if (ibs_enabled(vcpu)) {
3877                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
3878                         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
3879                 }
3880                 goto retry;
3881         }
3882
3883         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
3884                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
3885                 goto retry;
3886         }
3887
3888         if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
3889                 /*
3890                  * Disable CMM virtualization; we will emulate the ESSA
3891                  * instruction manually, in order to provide additional
3892                  * functionalities needed for live migration.
3893                  */
3894                 vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
3895                 goto retry;
3896         }
3897
3898         if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
3899                 /*
3900                  * Re-enable CMM virtualization if CMMA is available and
3901                  * CMM has been used.
3902                  */
3903                 if ((vcpu->kvm->arch.use_cmma) &&
3904                     (vcpu->kvm->mm->context.uses_cmm))
3905                         vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
3906                 goto retry;
3907         }
3908
3909         /* nothing to do, just clear the request */
3910         kvm_clear_request(KVM_REQ_UNHALT, vcpu);
3911         /* we left the vsie handler, nothing to do, just clear the request */
3912         kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
3913
3914         return 0;
3915 }
3916
3917 void kvm_s390_set_tod_clock(struct kvm *kvm,
3918                             const struct kvm_s390_vm_tod_clock *gtod)
3919 {
3920         struct kvm_vcpu *vcpu;
3921         union tod_clock clk;
3922         int i;
3923
3924         mutex_lock(&kvm->lock);
3925         preempt_disable();
3926
3927         store_tod_clock_ext(&clk);
3928
3929         kvm->arch.epoch = gtod->tod - clk.tod;
3930         kvm->arch.epdx = 0;
3931         if (test_kvm_facility(kvm, 139)) {
3932                 kvm->arch.epdx = gtod->epoch_idx - clk.ei;
3933                 if (kvm->arch.epoch > gtod->tod)
3934                         kvm->arch.epdx -= 1;
3935         }
3936
3937         kvm_s390_vcpu_block_all(kvm);
3938         kvm_for_each_vcpu(i, vcpu, kvm) {
3939                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
3940                 vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
3941         }
3942
3943         kvm_s390_vcpu_unblock_all(kvm);
3944         preempt_enable();
3945         mutex_unlock(&kvm->lock);
3946 }
3947
3948 /**
3949  * kvm_arch_fault_in_page - fault-in guest page if necessary
3950  * @vcpu: The corresponding virtual cpu
3951  * @gpa: Guest physical address
3952  * @writable: Whether the page should be writable or not
3953  *
3954  * Make sure that a guest page has been faulted-in on the host.
3955  *
3956  * Return: Zero on success, negative error code otherwise.
3957  */
3958 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
3959 {
3960         return gmap_fault(vcpu->arch.gmap, gpa,
3961                           writable ? FAULT_FLAG_WRITE : 0);
3962 }
3963
3964 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3965                                       unsigned long token)
3966 {
3967         struct kvm_s390_interrupt inti;
3968         struct kvm_s390_irq irq;
3969
3970         if (start_token) {
3971                 irq.u.ext.ext_params2 = token;
3972                 irq.type = KVM_S390_INT_PFAULT_INIT;
3973                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3974         } else {
3975                 inti.type = KVM_S390_INT_PFAULT_DONE;
3976                 inti.parm64 = token;
3977                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3978         }
3979 }
3980
3981 bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3982                                      struct kvm_async_pf *work)
3983 {
3984         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3985         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3986
3987         return true;
3988 }
3989
3990 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3991                                  struct kvm_async_pf *work)
3992 {
3993         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3994         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3995 }
3996
3997 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3998                                struct kvm_async_pf *work)
3999 {
4000         /* s390 will always inject the page directly */
4001 }
4002
4003 bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
4004 {
4005         /*
4006          * s390 will always inject the page directly,
4007          * but we still want check_async_completion to cleanup
4008          */
4009         return true;
4010 }
4011
4012 static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
4013 {
4014         hva_t hva;
4015         struct kvm_arch_async_pf arch;
4016
4017         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4018                 return false;
4019         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
4020             vcpu->arch.pfault_compare)
4021                 return false;
4022         if (psw_extint_disabled(vcpu))
4023                 return false;
4024         if (kvm_s390_vcpu_has_irq(vcpu, 0))
4025                 return false;
4026         if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
4027                 return false;
4028         if (!vcpu->arch.gmap->pfault_enabled)
4029                 return false;
4030
4031         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
4032         hva += current->thread.gmap_addr & ~PAGE_MASK;
4033         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
4034                 return false;
4035
4036         return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
4037 }
4038
4039 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
4040 {
4041         int rc, cpuflags;
4042
4043         /*
4044          * On s390 notifications for arriving pages will be delivered directly
4045          * to the guest but the house keeping for completed pfaults is
4046          * handled outside the worker.
4047          */
4048         kvm_check_async_pf_completion(vcpu);
4049
4050         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
4051         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
4052
4053         if (need_resched())
4054                 schedule();
4055
4056         if (!kvm_is_ucontrol(vcpu->kvm)) {
4057                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
4058                 if (rc)
4059                         return rc;
4060         }
4061
4062         rc = kvm_s390_handle_requests(vcpu);
4063         if (rc)
4064                 return rc;
4065
4066         if (guestdbg_enabled(vcpu)) {
4067                 kvm_s390_backup_guest_per_regs(vcpu);
4068                 kvm_s390_patch_guest_per_regs(vcpu);
4069         }
4070
4071         clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
4072
4073         vcpu->arch.sie_block->icptcode = 0;
4074         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
4075         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
4076         trace_kvm_s390_sie_enter(vcpu, cpuflags);
4077
4078         return 0;
4079 }
4080
4081 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
4082 {
4083         struct kvm_s390_pgm_info pgm_info = {
4084                 .code = PGM_ADDRESSING,
4085         };
4086         u8 opcode, ilen;
4087         int rc;
4088
4089         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
4090         trace_kvm_s390_sie_fault(vcpu);
4091
4092         /*
4093          * We want to inject an addressing exception, which is defined as a
4094          * suppressing or terminating exception. However, since we came here
4095          * by a DAT access exception, the PSW still points to the faulting
4096          * instruction since DAT exceptions are nullifying. So we've got
4097          * to look up the current opcode to get the length of the instruction
4098          * to be able to forward the PSW.
4099          */
4100         rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
4101         ilen = insn_length(opcode);
4102         if (rc < 0) {
4103                 return rc;
4104         } else if (rc) {
4105                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
4106                  * Forward by arbitrary ilc, injection will take care of
4107                  * nullification if necessary.
4108                  */
4109                 pgm_info = vcpu->arch.pgm;
4110                 ilen = 4;
4111         }
4112         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
4113         kvm_s390_forward_psw(vcpu, ilen);
4114         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
4115 }
4116
4117 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
4118 {
4119         struct mcck_volatile_info *mcck_info;
4120         struct sie_page *sie_page;
4121
4122         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
4123                    vcpu->arch.sie_block->icptcode);
4124         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
4125
4126         if (guestdbg_enabled(vcpu))
4127                 kvm_s390_restore_guest_per_regs(vcpu);
4128
4129         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
4130         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
4131
4132         if (exit_reason == -EINTR) {
4133                 VCPU_EVENT(vcpu, 3, "%s", "machine check");
4134                 sie_page = container_of(vcpu->arch.sie_block,
4135                                         struct sie_page, sie_block);
4136                 mcck_info = &sie_page->mcck_info;
4137                 kvm_s390_reinject_machine_check(vcpu, mcck_info);
4138                 return 0;
4139         }
4140
4141         if (vcpu->arch.sie_block->icptcode > 0) {
4142                 int rc = kvm_handle_sie_intercept(vcpu);
4143
4144                 if (rc != -EOPNOTSUPP)
4145                         return rc;
4146                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
4147                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
4148                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
4149                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
4150                 return -EREMOTE;
4151         } else if (exit_reason != -EFAULT) {
4152                 vcpu->stat.exit_null++;
4153                 return 0;
4154         } else if (kvm_is_ucontrol(vcpu->kvm)) {
4155                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
4156                 vcpu->run->s390_ucontrol.trans_exc_code =
4157                                                 current->thread.gmap_addr;
4158                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
4159                 return -EREMOTE;
4160         } else if (current->thread.gmap_pfault) {
4161                 trace_kvm_s390_major_guest_pfault(vcpu);
4162                 current->thread.gmap_pfault = 0;
4163                 if (kvm_arch_setup_async_pf(vcpu))
4164                         return 0;
4165                 vcpu->stat.pfault_sync++;
4166                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
4167         }
4168         return vcpu_post_run_fault_in_sie(vcpu);
4169 }
4170
4171 #define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
4172 static int __vcpu_run(struct kvm_vcpu *vcpu)
4173 {
4174         int rc, exit_reason;
4175         struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
4176
4177         /*
4178          * We try to hold kvm->srcu during most of vcpu_run (except when run-
4179          * ning the guest), so that memslots (and other stuff) are protected
4180          */
4181         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4182
4183         do {
4184                 rc = vcpu_pre_run(vcpu);
4185                 if (rc)
4186                         break;
4187
4188                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4189                 /*
4190                  * As PF_VCPU will be used in fault handler, between
4191                  * guest_enter and guest_exit should be no uaccess.
4192                  */
4193                 local_irq_disable();
4194                 guest_enter_irqoff();
4195                 __disable_cpu_timer_accounting(vcpu);
4196                 local_irq_enable();
4197                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4198                         memcpy(sie_page->pv_grregs,
4199                                vcpu->run->s.regs.gprs,
4200                                sizeof(sie_page->pv_grregs));
4201                 }
4202                 if (test_cpu_flag(CIF_FPU))
4203                         load_fpu_regs();
4204                 exit_reason = sie64a(vcpu->arch.sie_block,
4205                                      vcpu->run->s.regs.gprs);
4206                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4207                         memcpy(vcpu->run->s.regs.gprs,
4208                                sie_page->pv_grregs,
4209                                sizeof(sie_page->pv_grregs));
4210                         /*
4211                          * We're not allowed to inject interrupts on intercepts
4212                          * that leave the guest state in an "in-between" state
4213                          * where the next SIE entry will do a continuation.
4214                          * Fence interrupts in our "internal" PSW.
4215                          */
4216                         if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
4217                             vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
4218                                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4219                         }
4220                 }
4221                 local_irq_disable();
4222                 __enable_cpu_timer_accounting(vcpu);
4223                 guest_exit_irqoff();
4224                 local_irq_enable();
4225                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4226
4227                 rc = vcpu_post_run(vcpu, exit_reason);
4228         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
4229
4230         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
4231         return rc;
4232 }
4233
4234 static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
4235 {
4236         struct kvm_run *kvm_run = vcpu->run;
4237         struct runtime_instr_cb *riccb;
4238         struct gs_cb *gscb;
4239
4240         riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
4241         gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
4242         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
4243         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
4244         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4245                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
4246                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
4247                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
4248         }
4249         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
4250                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
4251                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
4252                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
4253                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
4254                         kvm_clear_async_pf_completion_queue(vcpu);
4255         }
4256         if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
4257                 vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
4258                 vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
4259         }
4260         /*
4261          * If userspace sets the riccb (e.g. after migration) to a valid state,
4262          * we should enable RI here instead of doing the lazy enablement.
4263          */
4264         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
4265             test_kvm_facility(vcpu->kvm, 64) &&
4266             riccb->v &&
4267             !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
4268                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
4269                 vcpu->arch.sie_block->ecb3 |= ECB3_RI;
4270         }
4271         /*
4272          * If userspace sets the gscb (e.g. after migration) to non-zero,
4273          * we should enable GS here instead of doing the lazy enablement.
4274          */
4275         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
4276             test_kvm_facility(vcpu->kvm, 133) &&
4277             gscb->gssm &&
4278             !vcpu->arch.gs_enabled) {
4279                 VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
4280                 vcpu->arch.sie_block->ecb |= ECB_GS;
4281                 vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
4282                 vcpu->arch.gs_enabled = 1;
4283         }
4284         if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
4285             test_kvm_facility(vcpu->kvm, 82)) {
4286                 vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
4287                 vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
4288         }
4289         if (MACHINE_HAS_GS) {
4290                 preempt_disable();
4291                 __ctl_set_bit(2, 4);
4292                 if (current->thread.gs_cb) {
4293                         vcpu->arch.host_gscb = current->thread.gs_cb;
4294                         save_gs_cb(vcpu->arch.host_gscb);
4295                 }
4296                 if (vcpu->arch.gs_enabled) {
4297                         current->thread.gs_cb = (struct gs_cb *)
4298                                                 &vcpu->run->s.regs.gscb;
4299                         restore_gs_cb(current->thread.gs_cb);
4300                 }
4301                 preempt_enable();
4302         }
4303         /* SIE will load etoken directly from SDNX and therefore kvm_run */
4304 }
4305
4306 static void sync_regs(struct kvm_vcpu *vcpu)
4307 {
4308         struct kvm_run *kvm_run = vcpu->run;
4309
4310         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
4311                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
4312         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
4313                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
4314                 /* some control register changes require a tlb flush */
4315                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4316         }
4317         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
4318                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
4319                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
4320         }
4321         save_access_regs(vcpu->arch.host_acrs);
4322         restore_access_regs(vcpu->run->s.regs.acrs);
4323         /* save host (userspace) fprs/vrs */
4324         save_fpu_regs();
4325         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
4326         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
4327         if (MACHINE_HAS_VX)
4328                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
4329         else
4330                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
4331         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
4332         if (test_fp_ctl(current->thread.fpu.fpc))
4333                 /* User space provided an invalid FPC, let's clear it */
4334                 current->thread.fpu.fpc = 0;
4335
4336         /* Sync fmt2 only data */
4337         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
4338                 sync_regs_fmt2(vcpu);
4339         } else {
4340                 /*
4341                  * In several places we have to modify our internal view to
4342                  * not do things that are disallowed by the ultravisor. For
4343                  * example we must not inject interrupts after specific exits
4344                  * (e.g. 112 prefix page not secure). We do this by turning
4345                  * off the machine check, external and I/O interrupt bits
4346                  * of our PSW copy. To avoid getting validity intercepts, we
4347                  * do only accept the condition code from userspace.
4348                  */
4349                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
4350                 vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
4351                                                    PSW_MASK_CC;
4352         }
4353
4354         kvm_run->kvm_dirty_regs = 0;
4355 }
4356
4357 static void store_regs_fmt2(struct kvm_vcpu *vcpu)
4358 {
4359         struct kvm_run *kvm_run = vcpu->run;
4360
4361         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
4362         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
4363         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
4364         kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
4365         kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
4366         if (MACHINE_HAS_GS) {
4367                 preempt_disable();
4368                 __ctl_set_bit(2, 4);
4369                 if (vcpu->arch.gs_enabled)
4370                         save_gs_cb(current->thread.gs_cb);
4371                 current->thread.gs_cb = vcpu->arch.host_gscb;
4372                 restore_gs_cb(vcpu->arch.host_gscb);
4373                 if (!vcpu->arch.host_gscb)
4374                         __ctl_clear_bit(2, 4);
4375                 vcpu->arch.host_gscb = NULL;
4376                 preempt_enable();
4377         }
4378         /* SIE will save etoken directly into SDNX and therefore kvm_run */
4379 }
4380
4381 static void store_regs(struct kvm_vcpu *vcpu)
4382 {
4383         struct kvm_run *kvm_run = vcpu->run;
4384
4385         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
4386         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
4387         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
4388         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
4389         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
4390         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
4391         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
4392         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
4393         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
4394         save_access_regs(vcpu->run->s.regs.acrs);
4395         restore_access_regs(vcpu->arch.host_acrs);
4396         /* Save guest register state */
4397         save_fpu_regs();
4398         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4399         /* Restore will be done lazily at return */
4400         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
4401         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
4402         if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
4403                 store_regs_fmt2(vcpu);
4404 }
4405
4406 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
4407 {
4408         struct kvm_run *kvm_run = vcpu->run;
4409         int rc;
4410
4411         if (kvm_run->immediate_exit)
4412                 return -EINTR;
4413
4414         if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
4415             kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS)
4416                 return -EINVAL;
4417
4418         vcpu_load(vcpu);
4419
4420         if (guestdbg_exit_pending(vcpu)) {
4421                 kvm_s390_prepare_debug_exit(vcpu);
4422                 rc = 0;
4423                 goto out;
4424         }
4425
4426         kvm_sigset_activate(vcpu);
4427
4428         /*
4429          * no need to check the return value of vcpu_start as it can only have
4430          * an error for protvirt, but protvirt means user cpu state
4431          */
4432         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
4433                 kvm_s390_vcpu_start(vcpu);
4434         } else if (is_vcpu_stopped(vcpu)) {
4435                 pr_err_ratelimited("can't run stopped vcpu %d\n",
4436                                    vcpu->vcpu_id);
4437                 rc = -EINVAL;
4438                 goto out;
4439         }
4440
4441         sync_regs(vcpu);
4442         enable_cpu_timer_accounting(vcpu);
4443
4444         might_fault();
4445         rc = __vcpu_run(vcpu);
4446
4447         if (signal_pending(current) && !rc) {
4448                 kvm_run->exit_reason = KVM_EXIT_INTR;
4449                 rc = -EINTR;
4450         }
4451
4452         if (guestdbg_exit_pending(vcpu) && !rc)  {
4453                 kvm_s390_prepare_debug_exit(vcpu);
4454                 rc = 0;
4455         }
4456
4457         if (rc == -EREMOTE) {
4458                 /* userspace support is needed, kvm_run has been prepared */
4459                 rc = 0;
4460         }
4461
4462         disable_cpu_timer_accounting(vcpu);
4463         store_regs(vcpu);
4464
4465         kvm_sigset_deactivate(vcpu);
4466
4467         vcpu->stat.exit_userspace++;
4468 out:
4469         vcpu_put(vcpu);
4470         return rc;
4471 }
4472
4473 /*
4474  * store status at address
4475  * we use have two special cases:
4476  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
4477  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
4478  */
4479 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
4480 {
4481         unsigned char archmode = 1;
4482         freg_t fprs[NUM_FPRS];
4483         unsigned int px;
4484         u64 clkcomp, cputm;
4485         int rc;
4486
4487         px = kvm_s390_get_prefix(vcpu);
4488         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
4489                 if (write_guest_abs(vcpu, 163, &archmode, 1))
4490                         return -EFAULT;
4491                 gpa = 0;
4492         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
4493                 if (write_guest_real(vcpu, 163, &archmode, 1))
4494                         return -EFAULT;
4495                 gpa = px;
4496         } else
4497                 gpa -= __LC_FPREGS_SAVE_AREA;
4498
4499         /* manually convert vector registers if necessary */
4500         if (MACHINE_HAS_VX) {
4501                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
4502                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4503                                      fprs, 128);
4504         } else {
4505                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
4506                                      vcpu->run->s.regs.fprs, 128);
4507         }
4508         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
4509                               vcpu->run->s.regs.gprs, 128);
4510         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
4511                               &vcpu->arch.sie_block->gpsw, 16);
4512         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
4513                               &px, 4);
4514         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
4515                               &vcpu->run->s.regs.fpc, 4);
4516         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
4517                               &vcpu->arch.sie_block->todpr, 4);
4518         cputm = kvm_s390_get_cpu_timer(vcpu);
4519         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
4520                               &cputm, 8);
4521         clkcomp = vcpu->arch.sie_block->ckc >> 8;
4522         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
4523                               &clkcomp, 8);
4524         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
4525                               &vcpu->run->s.regs.acrs, 64);
4526         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
4527                               &vcpu->arch.sie_block->gcr, 128);
4528         return rc ? -EFAULT : 0;
4529 }
4530
4531 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
4532 {
4533         /*
4534          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
4535          * switch in the run ioctl. Let's update our copies before we save
4536          * it into the save area
4537          */
4538         save_fpu_regs();
4539         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
4540         save_access_regs(vcpu->run->s.regs.acrs);
4541
4542         return kvm_s390_store_status_unloaded(vcpu, addr);
4543 }
4544
4545 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4546 {
4547         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
4548         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
4549 }
4550
4551 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
4552 {
4553         unsigned int i;
4554         struct kvm_vcpu *vcpu;
4555
4556         kvm_for_each_vcpu(i, vcpu, kvm) {
4557                 __disable_ibs_on_vcpu(vcpu);
4558         }
4559 }
4560
4561 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
4562 {
4563         if (!sclp.has_ibs)
4564                 return;
4565         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
4566         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
4567 }
4568
4569 int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
4570 {
4571         int i, online_vcpus, r = 0, started_vcpus = 0;
4572
4573         if (!is_vcpu_stopped(vcpu))
4574                 return 0;
4575
4576         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
4577         /* Only one cpu at a time may enter/leave the STOPPED state. */
4578         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4579         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4580
4581         /* Let's tell the UV that we want to change into the operating state */
4582         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4583                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
4584                 if (r) {
4585                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4586                         return r;
4587                 }
4588         }
4589
4590         for (i = 0; i < online_vcpus; i++) {
4591                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
4592                         started_vcpus++;
4593         }
4594
4595         if (started_vcpus == 0) {
4596                 /* we're the only active VCPU -> speed it up */
4597                 __enable_ibs_on_vcpu(vcpu);
4598         } else if (started_vcpus == 1) {
4599                 /*
4600                  * As we are starting a second VCPU, we have to disable
4601                  * the IBS facility on all VCPUs to remove potentially
4602                  * outstanding ENABLE requests.
4603                  */
4604                 __disable_ibs_on_all_vcpus(vcpu->kvm);
4605         }
4606
4607         kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
4608         /*
4609          * The real PSW might have changed due to a RESTART interpreted by the
4610          * ultravisor. We block all interrupts and let the next sie exit
4611          * refresh our view.
4612          */
4613         if (kvm_s390_pv_cpu_is_protected(vcpu))
4614                 vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
4615         /*
4616          * Another VCPU might have used IBS while we were offline.
4617          * Let's play safe and flush the VCPU at startup.
4618          */
4619         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
4620         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4621         return 0;
4622 }
4623
4624 int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
4625 {
4626         int i, online_vcpus, r = 0, started_vcpus = 0;
4627         struct kvm_vcpu *started_vcpu = NULL;
4628
4629         if (is_vcpu_stopped(vcpu))
4630                 return 0;
4631
4632         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
4633         /* Only one cpu at a time may enter/leave the STOPPED state. */
4634         spin_lock(&vcpu->kvm->arch.start_stop_lock);
4635         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
4636
4637         /* Let's tell the UV that we want to change into the stopped state */
4638         if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4639                 r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
4640                 if (r) {
4641                         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4642                         return r;
4643                 }
4644         }
4645
4646         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
4647         kvm_s390_clear_stop_irq(vcpu);
4648
4649         kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
4650         __disable_ibs_on_vcpu(vcpu);
4651
4652         for (i = 0; i < online_vcpus; i++) {
4653                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
4654                         started_vcpus++;
4655                         started_vcpu = vcpu->kvm->vcpus[i];
4656                 }
4657         }
4658
4659         if (started_vcpus == 1) {
4660                 /*
4661                  * As we only have one VCPU left, we want to enable the
4662                  * IBS facility for that VCPU to speed it up.
4663                  */
4664                 __enable_ibs_on_vcpu(started_vcpu);
4665         }
4666
4667         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
4668         return 0;
4669 }
4670
4671 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
4672                                      struct kvm_enable_cap *cap)
4673 {
4674         int r;
4675
4676         if (cap->flags)
4677                 return -EINVAL;
4678
4679         switch (cap->cap) {
4680         case KVM_CAP_S390_CSS_SUPPORT:
4681                 if (!vcpu->kvm->arch.css_support) {
4682                         vcpu->kvm->arch.css_support = 1;
4683                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
4684                         trace_kvm_s390_enable_css(vcpu->kvm);
4685                 }
4686                 r = 0;
4687                 break;
4688         default:
4689                 r = -EINVAL;
4690                 break;
4691         }
4692         return r;
4693 }
4694
4695 static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
4696                                    struct kvm_s390_mem_op *mop)
4697 {
4698         void __user *uaddr = (void __user *)mop->buf;
4699         int r = 0;
4700
4701         if (mop->flags || !mop->size)
4702                 return -EINVAL;
4703         if (mop->size + mop->sida_offset < mop->size)
4704                 return -EINVAL;
4705         if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
4706                 return -E2BIG;
4707
4708         switch (mop->op) {
4709         case KVM_S390_MEMOP_SIDA_READ:
4710                 if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
4711                                  mop->sida_offset), mop->size))
4712                         r = -EFAULT;
4713
4714                 break;
4715         case KVM_S390_MEMOP_SIDA_WRITE:
4716                 if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
4717                                    mop->sida_offset), uaddr, mop->size))
4718                         r = -EFAULT;
4719                 break;
4720         }
4721         return r;
4722 }
4723 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
4724                                   struct kvm_s390_mem_op *mop)
4725 {
4726         void __user *uaddr = (void __user *)mop->buf;
4727         void *tmpbuf = NULL;
4728         int r = 0;
4729         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
4730                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
4731
4732         if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
4733                 return -EINVAL;
4734
4735         if (mop->size > MEM_OP_MAX_SIZE)
4736                 return -E2BIG;
4737
4738         if (kvm_s390_pv_cpu_is_protected(vcpu))
4739                 return -EINVAL;
4740
4741         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
4742                 tmpbuf = vmalloc(mop->size);
4743                 if (!tmpbuf)
4744                         return -ENOMEM;
4745         }
4746
4747         switch (mop->op) {
4748         case KVM_S390_MEMOP_LOGICAL_READ:
4749                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4750                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4751                                             mop->size, GACC_FETCH);
4752                         break;
4753                 }
4754                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4755                 if (r == 0) {
4756                         if (copy_to_user(uaddr, tmpbuf, mop->size))
4757                                 r = -EFAULT;
4758                 }
4759                 break;
4760         case KVM_S390_MEMOP_LOGICAL_WRITE:
4761                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
4762                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
4763                                             mop->size, GACC_STORE);
4764                         break;
4765                 }
4766                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
4767                         r = -EFAULT;
4768                         break;
4769                 }
4770                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
4771                 break;
4772         }
4773
4774         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
4775                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
4776
4777         vfree(tmpbuf);
4778         return r;
4779 }
4780
4781 static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
4782                                       struct kvm_s390_mem_op *mop)
4783 {
4784         int r, srcu_idx;
4785
4786         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
4787
4788         switch (mop->op) {
4789         case KVM_S390_MEMOP_LOGICAL_READ:
4790         case KVM_S390_MEMOP_LOGICAL_WRITE:
4791                 r = kvm_s390_guest_mem_op(vcpu, mop);
4792                 break;
4793         case KVM_S390_MEMOP_SIDA_READ:
4794         case KVM_S390_MEMOP_SIDA_WRITE:
4795                 /* we are locked against sida going away by the vcpu->mutex */
4796                 r = kvm_s390_guest_sida_op(vcpu, mop);
4797                 break;
4798         default:
4799                 r = -EINVAL;
4800         }
4801
4802         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
4803         return r;
4804 }
4805
4806 long kvm_arch_vcpu_async_ioctl(struct file *filp,
4807                                unsigned int ioctl, unsigned long arg)
4808 {
4809         struct kvm_vcpu *vcpu = filp->private_data;
4810         void __user *argp = (void __user *)arg;
4811
4812         switch (ioctl) {
4813         case KVM_S390_IRQ: {
4814                 struct kvm_s390_irq s390irq;
4815
4816                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
4817                         return -EFAULT;
4818                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4819         }
4820         case KVM_S390_INTERRUPT: {
4821                 struct kvm_s390_interrupt s390int;
4822                 struct kvm_s390_irq s390irq = {};
4823
4824                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
4825                         return -EFAULT;
4826                 if (s390int_to_s390irq(&s390int, &s390irq))
4827                         return -EINVAL;
4828                 return kvm_s390_inject_vcpu(vcpu, &s390irq);
4829         }
4830         }
4831         return -ENOIOCTLCMD;
4832 }
4833
4834 long kvm_arch_vcpu_ioctl(struct file *filp,
4835                          unsigned int ioctl, unsigned long arg)
4836 {
4837         struct kvm_vcpu *vcpu = filp->private_data;
4838         void __user *argp = (void __user *)arg;
4839         int idx;
4840         long r;
4841         u16 rc, rrc;
4842
4843         vcpu_load(vcpu);
4844
4845         switch (ioctl) {
4846         case KVM_S390_STORE_STATUS:
4847                 idx = srcu_read_lock(&vcpu->kvm->srcu);
4848                 r = kvm_s390_store_status_unloaded(vcpu, arg);
4849                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
4850                 break;
4851         case KVM_S390_SET_INITIAL_PSW: {
4852                 psw_t psw;
4853
4854                 r = -EFAULT;
4855                 if (copy_from_user(&psw, argp, sizeof(psw)))
4856                         break;
4857                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
4858                 break;
4859         }
4860         case KVM_S390_CLEAR_RESET:
4861                 r = 0;
4862                 kvm_arch_vcpu_ioctl_clear_reset(vcpu);
4863                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4864                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4865                                           UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
4866                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
4867                                    rc, rrc);
4868                 }
4869                 break;
4870         case KVM_S390_INITIAL_RESET:
4871                 r = 0;
4872                 kvm_arch_vcpu_ioctl_initial_reset(vcpu);
4873                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4874                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4875                                           UVC_CMD_CPU_RESET_INITIAL,
4876                                           &rc, &rrc);
4877                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
4878                                    rc, rrc);
4879                 }
4880                 break;
4881         case KVM_S390_NORMAL_RESET:
4882                 r = 0;
4883                 kvm_arch_vcpu_ioctl_normal_reset(vcpu);
4884                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
4885                         r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
4886                                           UVC_CMD_CPU_RESET, &rc, &rrc);
4887                         VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
4888                                    rc, rrc);
4889                 }
4890                 break;
4891         case KVM_SET_ONE_REG:
4892         case KVM_GET_ONE_REG: {
4893                 struct kvm_one_reg reg;
4894                 r = -EINVAL;
4895                 if (kvm_s390_pv_cpu_is_protected(vcpu))
4896                         break;
4897                 r = -EFAULT;
4898                 if (copy_from_user(&reg, argp, sizeof(reg)))
4899                         break;
4900                 if (ioctl == KVM_SET_ONE_REG)
4901                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
4902                 else
4903                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
4904                 break;
4905         }
4906 #ifdef CONFIG_KVM_S390_UCONTROL
4907         case KVM_S390_UCAS_MAP: {
4908                 struct kvm_s390_ucas_mapping ucasmap;
4909
4910                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4911                         r = -EFAULT;
4912                         break;
4913                 }
4914
4915                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4916                         r = -EINVAL;
4917                         break;
4918                 }
4919
4920                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
4921                                      ucasmap.vcpu_addr, ucasmap.length);
4922                 break;
4923         }
4924         case KVM_S390_UCAS_UNMAP: {
4925                 struct kvm_s390_ucas_mapping ucasmap;
4926
4927                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
4928                         r = -EFAULT;
4929                         break;
4930                 }
4931
4932                 if (!kvm_is_ucontrol(vcpu->kvm)) {
4933                         r = -EINVAL;
4934                         break;
4935                 }
4936
4937                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
4938                         ucasmap.length);
4939                 break;
4940         }
4941 #endif
4942         case KVM_S390_VCPU_FAULT: {
4943                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
4944                 break;
4945         }
4946         case KVM_ENABLE_CAP:
4947         {
4948                 struct kvm_enable_cap cap;
4949                 r = -EFAULT;
4950                 if (copy_from_user(&cap, argp, sizeof(cap)))
4951                         break;
4952                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
4953                 break;
4954         }
4955         case KVM_S390_MEM_OP: {
4956                 struct kvm_s390_mem_op mem_op;
4957
4958                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
4959                         r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
4960                 else
4961                         r = -EFAULT;
4962                 break;
4963         }
4964         case KVM_S390_SET_IRQ_STATE: {
4965                 struct kvm_s390_irq_state irq_state;
4966
4967                 r = -EFAULT;
4968                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4969                         break;
4970                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
4971                     irq_state.len == 0 ||
4972                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
4973                         r = -EINVAL;
4974                         break;
4975                 }
4976                 /* do not use irq_state.flags, it will break old QEMUs */
4977                 r = kvm_s390_set_irq_state(vcpu,
4978                                            (void __user *) irq_state.buf,
4979                                            irq_state.len);
4980                 break;
4981         }
4982         case KVM_S390_GET_IRQ_STATE: {
4983                 struct kvm_s390_irq_state irq_state;
4984
4985                 r = -EFAULT;
4986                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
4987                         break;
4988                 if (irq_state.len == 0) {
4989                         r = -EINVAL;
4990                         break;
4991                 }
4992                 /* do not use irq_state.flags, it will break old QEMUs */
4993                 r = kvm_s390_get_irq_state(vcpu,
4994                                            (__u8 __user *)  irq_state.buf,
4995                                            irq_state.len);
4996                 break;
4997         }
4998         default:
4999                 r = -ENOTTY;
5000         }
5001
5002         vcpu_put(vcpu);
5003         return r;
5004 }
5005
5006 vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
5007 {
5008 #ifdef CONFIG_KVM_S390_UCONTROL
5009         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
5010                  && (kvm_is_ucontrol(vcpu->kvm))) {
5011                 vmf->page = virt_to_page(vcpu->arch.sie_block);
5012                 get_page(vmf->page);
5013                 return 0;
5014         }
5015 #endif
5016         return VM_FAULT_SIGBUS;
5017 }
5018
5019 /* Section: memory related */
5020 int kvm_arch_prepare_memory_region(struct kvm *kvm,
5021                                    struct kvm_memory_slot *memslot,
5022                                    const struct kvm_userspace_memory_region *mem,
5023                                    enum kvm_mr_change change)
5024 {
5025         /* A few sanity checks. We can have memory slots which have to be
5026            located/ended at a segment boundary (1MB). The memory in userland is
5027            ok to be fragmented into various different vmas. It is okay to mmap()
5028            and munmap() stuff in this slot after doing this call at any time */
5029
5030         if (mem->userspace_addr & 0xffffful)
5031                 return -EINVAL;
5032
5033         if (mem->memory_size & 0xffffful)
5034                 return -EINVAL;
5035
5036         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
5037                 return -EINVAL;
5038
5039         /* When we are protected, we should not change the memory slots */
5040         if (kvm_s390_pv_get_handle(kvm))
5041                 return -EINVAL;
5042         return 0;
5043 }
5044
5045 void kvm_arch_commit_memory_region(struct kvm *kvm,
5046                                 const struct kvm_userspace_memory_region *mem,
5047                                 struct kvm_memory_slot *old,
5048                                 const struct kvm_memory_slot *new,
5049                                 enum kvm_mr_change change)
5050 {
5051         int rc = 0;
5052
5053         switch (change) {
5054         case KVM_MR_DELETE:
5055                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5056                                         old->npages * PAGE_SIZE);
5057                 break;
5058         case KVM_MR_MOVE:
5059                 rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
5060                                         old->npages * PAGE_SIZE);
5061                 if (rc)
5062                         break;
5063                 fallthrough;
5064         case KVM_MR_CREATE:
5065                 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
5066                                       mem->guest_phys_addr, mem->memory_size);
5067                 break;
5068         case KVM_MR_FLAGS_ONLY:
5069                 break;
5070         default:
5071                 WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
5072         }
5073         if (rc)
5074                 pr_warn("failed to commit memory region\n");
5075         return;
5076 }
5077
5078 static inline unsigned long nonhyp_mask(int i)
5079 {
5080         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
5081
5082         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
5083 }
5084
5085 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
5086 {
5087         vcpu->valid_wakeup = false;
5088 }
5089
5090 static int __init kvm_s390_init(void)
5091 {
5092         int i;
5093
5094         if (!sclp.has_sief2) {
5095                 pr_info("SIE is not available\n");
5096                 return -ENODEV;
5097         }
5098
5099         if (nested && hpage) {
5100                 pr_info("A KVM host that supports nesting cannot back its KVM guests with huge pages\n");
5101                 return -EINVAL;
5102         }
5103
5104         for (i = 0; i < 16; i++)
5105                 kvm_s390_fac_base[i] |=
5106                         stfle_fac_list[i] & nonhyp_mask(i);
5107
5108         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
5109 }
5110
5111 static void __exit kvm_s390_exit(void)
5112 {
5113         kvm_exit();
5114 }
5115
5116 module_init(kvm_s390_init);
5117 module_exit(kvm_s390_exit);
5118
5119 /*
5120  * Enable autoloading of the kvm module.
5121  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
5122  * since x86 takes a different approach.
5123  */
5124 #include <linux/miscdevice.h>
5125 MODULE_ALIAS_MISCDEV(KVM_MINOR);
5126 MODULE_ALIAS("devname:kvm");