arch/arm64/kvm/reset.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Copyright (C) 2012,2013 - ARM Ltd
   4  * Author: Marc Zyngier <marc.zyngier@arm.com>
   5  *
   6  * Derived from arch/arm/kvm/reset.c
   7  * Copyright (C) 2012 - Virtual Open Systems and Columbia University
   8  * Author: Christoffer Dall <c.dall@virtualopensystems.com>
   9  */
  10
  11 #include <linux/errno.h>
  12 #include <linux/kernel.h>
  13 #include <linux/kvm_host.h>
  14 #include <linux/kvm.h>
  15 #include <linux/hw_breakpoint.h>
  16 #include <linux/slab.h>
  17 #include <linux/string.h>
  18 #include <linux/types.h>
  19
  20 #include <kvm/arm_arch_timer.h>
  21
  22 #include <asm/cpufeature.h>
  23 #include <asm/cputype.h>
  24 #include <asm/fpsimd.h>
  25 #include <asm/ptrace.h>
  26 #include <asm/kvm_arm.h>
  27 #include <asm/kvm_asm.h>
  28 #include <asm/kvm_coproc.h>
  29 #include <asm/kvm_emulate.h>
  30 #include <asm/kvm_mmu.h>
  31 #include <asm/virt.h>
  32
  33 /* Maximum phys_shift supported for any VM on this host */
  34 static u32 kvm_ipa_limit;
  35
  36 /*
  37  * ARMv8 Reset Values
  38  */
  39 #define VCPU_RESET_PSTATE_EL1   (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \
  40                                  PSR_F_BIT | PSR_D_BIT)
  41
  42 #define VCPU_RESET_PSTATE_SVC   (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \
  43                                  PSR_AA32_I_BIT | PSR_AA32_F_BIT)
  44
  45 /**
  46  * kvm_arch_vm_ioctl_check_extension
  47  *
  48  * We currently assume that the number of HW registers is uniform
  49  * across all CPUs (see cpuinfo_sanity_check).
  50  */
  51 int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
  52 {
  53         int r;
  54
  55         switch (ext) {
  56         case KVM_CAP_ARM_EL1_32BIT:
  57                 r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1);
  58                 break;
  59         case KVM_CAP_GUEST_DEBUG_HW_BPS:
  60                 r = get_num_brps();
  61                 break;
  62         case KVM_CAP_GUEST_DEBUG_HW_WPS:
  63                 r = get_num_wrps();
  64                 break;
  65         case KVM_CAP_ARM_PMU_V3:
  66                 r = kvm_arm_support_pmu_v3();
  67                 break;
  68         case KVM_CAP_ARM_INJECT_SERROR_ESR:
  69                 r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
  70                 break;
  71         case KVM_CAP_SET_GUEST_DEBUG:
  72         case KVM_CAP_VCPU_ATTRIBUTES:
  73                 r = 1;
  74                 break;
  75         case KVM_CAP_ARM_VM_IPA_SIZE:
  76                 r = kvm_ipa_limit;
  77                 break;
  78         case KVM_CAP_ARM_SVE:
  79                 r = system_supports_sve();
  80                 break;
  81         case KVM_CAP_ARM_PTRAUTH_ADDRESS:
  82         case KVM_CAP_ARM_PTRAUTH_GENERIC:
  83                 r = has_vhe() && system_supports_address_auth() &&
  84                                  system_supports_generic_auth();
  85                 break;
  86         default:
  87                 r = 0;
  88         }
  89
  90         return r;
  91 }
  92
  93 unsigned int kvm_sve_max_vl;
  94
  95 int kvm_arm_init_sve(void)
  96 {
  97         if (system_supports_sve()) {
  98                 kvm_sve_max_vl = sve_max_virtualisable_vl;
  99
 100                 /*
 101                  * The get_sve_reg()/set_sve_reg() ioctl interface will need
 102                  * to be extended with multiple register slice support in
 103                  * order to support vector lengths greater than
 104                  * SVE_VL_ARCH_MAX:
 105                  */
 106                 if (WARN_ON(kvm_sve_max_vl > SVE_VL_ARCH_MAX))
 107                         kvm_sve_max_vl = SVE_VL_ARCH_MAX;
 108
 109                 /*
 110                  * Don't even try to make use of vector lengths that
 111                  * aren't available on all CPUs, for now:
 112                  */
 113                 if (kvm_sve_max_vl < sve_max_vl)
 114                         pr_warn("KVM: SVE vector length for guests limited to %u bytes\n",
 115                                 kvm_sve_max_vl);
 116         }
 117
 118         return 0;
 119 }
 120
 121 static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
 122 {
 123         if (!system_supports_sve())
 124                 return -EINVAL;
 125
 126         /* Verify that KVM startup enforced this when SVE was detected: */
 127         if (WARN_ON(!has_vhe()))
 128                 return -EINVAL;
 129
 130         vcpu->arch.sve_max_vl = kvm_sve_max_vl;
 131
 132         /*
 133          * Userspace can still customize the vector lengths by writing
 134          * KVM_REG_ARM64_SVE_VLS.  Allocation is deferred until
 135          * kvm_arm_vcpu_finalize(), which freezes the configuration.
 136          */
 137         vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
 138
 139         return 0;
 140 }
 141
 142 /*
 143  * Finalize vcpu's maximum SVE vector length, allocating
 144  * vcpu->arch.sve_state as necessary.
 145  */
 146 static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu)
 147 {
 148         void *buf;
 149         unsigned int vl;
 150
 151         vl = vcpu->arch.sve_max_vl;
 152
 153         /*
 154          * Responsibility for these properties is shared between
 155          * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and
 156          * set_sve_vls().  Double-check here just to be sure:
 157          */
 158         if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl ||
 159                     vl > SVE_VL_ARCH_MAX))
 160                 return -EIO;
 161
 162         buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL);
 163         if (!buf)
 164                 return -ENOMEM;
 165
 166         vcpu->arch.sve_state = buf;
 167         vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED;
 168         return 0;
 169 }
 170
 171 int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature)
 172 {
 173         switch (feature) {
 174         case KVM_ARM_VCPU_SVE:
 175                 if (!vcpu_has_sve(vcpu))
 176                         return -EINVAL;
 177
 178                 if (kvm_arm_vcpu_sve_finalized(vcpu))
 179                         return -EPERM;
 180
 181                 return kvm_vcpu_finalize_sve(vcpu);
 182         }
 183
 184         return -EINVAL;
 185 }
 186
 187 bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu)
 188 {
 189         if (vcpu_has_sve(vcpu) && !kvm_arm_vcpu_sve_finalized(vcpu))
 190                 return false;
 191
 192         return true;
 193 }
 194
 195 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
 196 {
 197         kfree(vcpu->arch.sve_state);
 198 }
 199
 200 static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu)
 201 {
 202         if (vcpu_has_sve(vcpu))
 203                 memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu));
 204 }
 205
 206 static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
 207 {
 208         /* Support ptrauth only if the system supports these capabilities. */
 209         if (!has_vhe())
 210                 return -EINVAL;
 211
 212         if (!system_supports_address_auth() ||
 213             !system_supports_generic_auth())
 214                 return -EINVAL;
 215         /*
 216          * For now make sure that both address/generic pointer authentication
 217          * features are requested by the userspace together.
 218          */
 219         if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
 220             !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features))
 221                 return -EINVAL;
 222
 223         vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH;
 224         return 0;
 225 }
 226
 227 /**
 228  * kvm_reset_vcpu - sets core registers and sys_regs to reset value
 229  * @vcpu: The VCPU pointer
 230  *
 231  * This function finds the right table above and sets the registers on
 232  * the virtual CPU struct to their architecturally defined reset
 233  * values, except for registers whose reset is deferred until
 234  * kvm_arm_vcpu_finalize().
 235  *
 236  * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT
 237  * ioctl or as part of handling a request issued by another VCPU in the PSCI
 238  * handling code.  In the first case, the VCPU will not be loaded, and in the
 239  * second case the VCPU will be loaded.  Because this function operates purely
 240  * on the memory-backed values of system registers, we want to do a full put if
 241  * we were loaded (handling a request) and load the values back at the end of
 242  * the function.  Otherwise we leave the state alone.  In both cases, we
 243  * disable preemption around the vcpu reset as we would otherwise race with
 244  * preempt notifiers which also call put/load.
 245  */
 246 int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 247 {
 248         int ret = -EINVAL;
 249         bool loaded;
 250         u32 pstate;
 251
 252         /* Reset PMU outside of the non-preemptible section */
 253         kvm_pmu_vcpu_reset(vcpu);
 254
 255         preempt_disable();
 256         loaded = (vcpu->cpu != -1);
 257         if (loaded)
 258                 kvm_arch_vcpu_put(vcpu);
 259
 260         if (!kvm_arm_vcpu_sve_finalized(vcpu)) {
 261                 if (test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
 262                         ret = kvm_vcpu_enable_sve(vcpu);
 263                         if (ret)
 264                                 goto out;
 265                 }
 266         } else {
 267                 kvm_vcpu_reset_sve(vcpu);
 268         }
 269
 270         if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) ||
 271             test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) {
 272                 if (kvm_vcpu_enable_ptrauth(vcpu))
 273                         goto out;
 274         }
 275
 276         switch (vcpu->arch.target) {
 277         default:
 278                 if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
 279                         if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1))
 280                                 goto out;
 281                         pstate = VCPU_RESET_PSTATE_SVC;
 282                 } else {
 283                         pstate = VCPU_RESET_PSTATE_EL1;
 284                 }
 285
 286                 break;
 287         }
 288
 289         /* Reset core registers */
 290         memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
 291         vcpu_gp_regs(vcpu)->regs.pstate = pstate;
 292
 293         /* Reset system registers */
 294         kvm_reset_sys_regs(vcpu);
 295
 296         /*
 297          * Additional reset state handling that PSCI may have imposed on us.
 298          * Must be done after all the sys_reg reset.
 299          */
 300         if (vcpu->arch.reset_state.reset) {
 301                 unsigned long target_pc = vcpu->arch.reset_state.pc;
 302
 303                 /* Gracefully handle Thumb2 entry point */
 304                 if (vcpu_mode_is_32bit(vcpu) && (target_pc & 1)) {
 305                         target_pc &= ~1UL;
 306                         vcpu_set_thumb(vcpu);
 307                 }
 308
 309                 /* Propagate caller endianness */
 310                 if (vcpu->arch.reset_state.be)
 311                         kvm_vcpu_set_be(vcpu);
 312
 313                 *vcpu_pc(vcpu) = target_pc;
 314                 vcpu_set_reg(vcpu, 0, vcpu->arch.reset_state.r0);
 315
 316                 vcpu->arch.reset_state.reset = false;
 317         }
 318
 319         /* Default workaround setup is enabled (if supported) */
 320         if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL)
 321                 vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
 322
 323         /* Reset timer */
 324         ret = kvm_timer_vcpu_reset(vcpu);
 325 out:
 326         if (loaded)
 327                 kvm_arch_vcpu_load(vcpu, smp_processor_id());
 328         preempt_enable();
 329         return ret;
 330 }
 331
 332 u32 get_kvm_ipa_limit(void)
 333 {
 334         return kvm_ipa_limit;
 335 }
 336
 337 int kvm_set_ipa_limit(void)
 338 {
 339         unsigned int ipa_max, pa_max, va_max, parange, tgran_2;
 340         u64 mmfr0;
 341
 342         mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 343         parange = cpuid_feature_extract_unsigned_field(mmfr0,
 344                                 ID_AA64MMFR0_PARANGE_SHIFT);
 345
 346         /*
 347          * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at
 348          * Stage-2. If not, things will stop very quickly.
 349          */
 350         switch (PAGE_SIZE) {
 351         default:
 352         case SZ_4K:
 353                 tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT;
 354                 break;
 355         case SZ_16K:
 356                 tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT;
 357                 break;
 358         case SZ_64K:
 359                 tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT;
 360                 break;
 361         }
 362
 363         switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
 364         default:
 365         case 1:
 366                 kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
 367                 return -EINVAL;
 368         case 0:
 369                 kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n");
 370                 break;
 371         case 2:
 372                 kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n");
 373                 break;
 374         }
 375
 376         pa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
 377
 378         /* Clamp the IPA limit to the PA size supported by the kernel */
 379         ipa_max = (pa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : pa_max;
 380         /*
 381          * Since our stage2 table is dependent on the stage1 page table code,
 382          * we must always honor the following condition:
 383          *
 384          *  Number of levels in Stage1 >= Number of levels in Stage2.
 385          *
 386          * So clamp the ipa limit further down to limit the number of levels.
 387          * Since we can concatenate upto 16 tables at entry level, we could
 388          * go upto 4bits above the maximum VA addressable with the current
 389          * number of levels.
 390          */
 391         va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
 392         va_max += 4;
 393
 394         if (va_max < ipa_max)
 395                 ipa_max = va_max;
 396
 397         /*
 398          * If the final limit is lower than the real physical address
 399          * limit of the CPUs, report the reason.
 400          */
 401         if (ipa_max < pa_max)
 402                 pr_info("kvm: Limiting the IPA size due to kernel %s Address limit\n",
 403                         (va_max < pa_max) ? "Virtual" : "Physical");
 404
 405         WARN(ipa_max < KVM_PHYS_SHIFT,
 406              "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max);
 407         kvm_ipa_limit = ipa_max;
 408         kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit);
 409
 410         return 0;
 411 }
 412
 413 /*
 414  * Configure the VTCR_EL2 for this VM. The VTCR value is common
 415  * across all the physical CPUs on the system. We use system wide
 416  * sanitised values to fill in different fields, except for Hardware
 417  * Management of Access Flags. HA Flag is set unconditionally on
 418  * all CPUs, as it is safe to run with or without the feature and
 419  * the bit is RES0 on CPUs that don't support it.
 420  */
 421 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
 422 {
 423         u64 vtcr = VTCR_EL2_FLAGS, mmfr0;
 424         u32 parange, phys_shift;
 425         u8 lvls;
 426
 427         if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
 428                 return -EINVAL;
 429
 430         phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
 431         if (phys_shift) {
 432                 if (phys_shift > kvm_ipa_limit ||
 433                     phys_shift < 32)
 434                         return -EINVAL;
 435         } else {
 436                 phys_shift = KVM_PHYS_SHIFT;
 437         }
 438
 439         mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 440         parange = cpuid_feature_extract_unsigned_field(mmfr0,
 441                                 ID_AA64MMFR0_PARANGE_SHIFT);
 442         if (parange > ID_AA64MMFR0_PARANGE_MAX)
 443                 parange = ID_AA64MMFR0_PARANGE_MAX;
 444         vtcr |= parange << VTCR_EL2_PS_SHIFT;
 445
 446         vtcr |= VTCR_EL2_T0SZ(phys_shift);
 447         /*
 448          * Use a minimum 2 level page table to prevent splitting
 449          * host PMD huge pages at stage2.
 450          */
 451         lvls = stage2_pgtable_levels(phys_shift);
 452         if (lvls < 2)
 453                 lvls = 2;
 454         vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
 455
 456         /*
 457          * Enable the Hardware Access Flag management, unconditionally
 458          * on all CPUs. The features is RES0 on CPUs without the support
 459          * and must be ignored by the CPUs.
 460          */
 461         vtcr |= VTCR_EL2_HA;
 462
 463         /* Set the vmid bits */
 464         vtcr |= (kvm_get_vmid_bits() == 16) ?
 465                 VTCR_EL2_VS_16BIT :
 466                 VTCR_EL2_VS_8BIT;
 467         kvm->arch.vtcr = vtcr;
 468         return 0;
 469 }