Merge branch 'pm-docs'
[linux-2.6-microblaze.git] / arch / arm64 / kvm / psci.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 - ARM Ltd
4  * Author: Marc Zyngier <marc.zyngier@arm.com>
5  */
6
7 #include <linux/arm-smccc.h>
8 #include <linux/preempt.h>
9 #include <linux/kvm_host.h>
10 #include <linux/uaccess.h>
11 #include <linux/wait.h>
12
13 #include <asm/cputype.h>
14 #include <asm/kvm_emulate.h>
15
16 #include <kvm/arm_psci.h>
17 #include <kvm/arm_hypercalls.h>
18
19 /*
20  * This is an implementation of the Power State Coordination Interface
21  * as described in ARM document number ARM DEN 0022A.
22  */
23
24 #define AFFINITY_MASK(level)    ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
25
26 static unsigned long psci_affinity_mask(unsigned long affinity_level)
27 {
28         if (affinity_level <= 3)
29                 return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level);
30
31         return 0;
32 }
33
34 static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu)
35 {
36         /*
37          * NOTE: For simplicity, we make VCPU suspend emulation to be
38          * same-as WFI (Wait-for-interrupt) emulation.
39          *
40          * This means for KVM the wakeup events are interrupts and
41          * this is consistent with intended use of StateID as described
42          * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A).
43          *
44          * Further, we also treat power-down request to be same as
45          * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2
46          * specification (ARM DEN 0022A). This means all suspend states
47          * for KVM will preserve the register state.
48          */
49         kvm_vcpu_wfi(vcpu);
50
51         return PSCI_RET_SUCCESS;
52 }
53
54 static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu)
55 {
56         vcpu->arch.power_off = true;
57         kvm_make_request(KVM_REQ_SLEEP, vcpu);
58         kvm_vcpu_kick(vcpu);
59 }
60
61 static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu,
62                                            unsigned long affinity)
63 {
64         return !(affinity & ~MPIDR_HWID_BITMASK);
65 }
66
67 static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
68 {
69         struct vcpu_reset_state *reset_state;
70         struct kvm *kvm = source_vcpu->kvm;
71         struct kvm_vcpu *vcpu = NULL;
72         unsigned long cpu_id;
73
74         cpu_id = smccc_get_arg1(source_vcpu);
75         if (!kvm_psci_valid_affinity(source_vcpu, cpu_id))
76                 return PSCI_RET_INVALID_PARAMS;
77
78         vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id);
79
80         /*
81          * Make sure the caller requested a valid CPU and that the CPU is
82          * turned off.
83          */
84         if (!vcpu)
85                 return PSCI_RET_INVALID_PARAMS;
86         if (!vcpu->arch.power_off) {
87                 if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1)
88                         return PSCI_RET_ALREADY_ON;
89                 else
90                         return PSCI_RET_INVALID_PARAMS;
91         }
92
93         reset_state = &vcpu->arch.reset_state;
94
95         reset_state->pc = smccc_get_arg2(source_vcpu);
96
97         /* Propagate caller endianness */
98         reset_state->be = kvm_vcpu_is_be(source_vcpu);
99
100         /*
101          * NOTE: We always update r0 (or x0) because for PSCI v0.1
102          * the general purpose registers are undefined upon CPU_ON.
103          */
104         reset_state->r0 = smccc_get_arg3(source_vcpu);
105
106         WRITE_ONCE(reset_state->reset, true);
107         kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
108
109         /*
110          * Make sure the reset request is observed if the change to
111          * power_off is observed.
112          */
113         smp_wmb();
114
115         vcpu->arch.power_off = false;
116         kvm_vcpu_wake_up(vcpu);
117
118         return PSCI_RET_SUCCESS;
119 }
120
121 static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
122 {
123         int matching_cpus = 0;
124         unsigned long i, mpidr;
125         unsigned long target_affinity;
126         unsigned long target_affinity_mask;
127         unsigned long lowest_affinity_level;
128         struct kvm *kvm = vcpu->kvm;
129         struct kvm_vcpu *tmp;
130
131         target_affinity = smccc_get_arg1(vcpu);
132         lowest_affinity_level = smccc_get_arg2(vcpu);
133
134         if (!kvm_psci_valid_affinity(vcpu, target_affinity))
135                 return PSCI_RET_INVALID_PARAMS;
136
137         /* Determine target affinity mask */
138         target_affinity_mask = psci_affinity_mask(lowest_affinity_level);
139         if (!target_affinity_mask)
140                 return PSCI_RET_INVALID_PARAMS;
141
142         /* Ignore other bits of target affinity */
143         target_affinity &= target_affinity_mask;
144
145         /*
146          * If one or more VCPU matching target affinity are running
147          * then ON else OFF
148          */
149         kvm_for_each_vcpu(i, tmp, kvm) {
150                 mpidr = kvm_vcpu_get_mpidr_aff(tmp);
151                 if ((mpidr & target_affinity_mask) == target_affinity) {
152                         matching_cpus++;
153                         if (!tmp->arch.power_off)
154                                 return PSCI_0_2_AFFINITY_LEVEL_ON;
155                 }
156         }
157
158         if (!matching_cpus)
159                 return PSCI_RET_INVALID_PARAMS;
160
161         return PSCI_0_2_AFFINITY_LEVEL_OFF;
162 }
163
164 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
165 {
166         unsigned long i;
167         struct kvm_vcpu *tmp;
168
169         /*
170          * The KVM ABI specifies that a system event exit may call KVM_RUN
171          * again and may perform shutdown/reboot at a later time that when the
172          * actual request is made.  Since we are implementing PSCI and a
173          * caller of PSCI reboot and shutdown expects that the system shuts
174          * down or reboots immediately, let's make sure that VCPUs are not run
175          * after this call is handled and before the VCPUs have been
176          * re-initialized.
177          */
178         kvm_for_each_vcpu(i, tmp, vcpu->kvm)
179                 tmp->arch.power_off = true;
180         kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
181
182         memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
183         vcpu->run->system_event.type = type;
184         vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
185 }
186
187 static void kvm_psci_system_off(struct kvm_vcpu *vcpu)
188 {
189         kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN);
190 }
191
192 static void kvm_psci_system_reset(struct kvm_vcpu *vcpu)
193 {
194         kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET);
195 }
196
197 static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu)
198 {
199         int i;
200
201         /*
202          * Zero the input registers' upper 32 bits. They will be fully
203          * zeroed on exit, so we're fine changing them in place.
204          */
205         for (i = 1; i < 4; i++)
206                 vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i)));
207 }
208
209 static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn)
210 {
211         switch(fn) {
212         case PSCI_0_2_FN64_CPU_SUSPEND:
213         case PSCI_0_2_FN64_CPU_ON:
214         case PSCI_0_2_FN64_AFFINITY_INFO:
215                 /* Disallow these functions for 32bit guests */
216                 if (vcpu_mode_is_32bit(vcpu))
217                         return PSCI_RET_NOT_SUPPORTED;
218                 break;
219         }
220
221         return 0;
222 }
223
224 static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
225 {
226         struct kvm *kvm = vcpu->kvm;
227         u32 psci_fn = smccc_get_function(vcpu);
228         unsigned long val;
229         int ret = 1;
230
231         val = kvm_psci_check_allowed_function(vcpu, psci_fn);
232         if (val)
233                 goto out;
234
235         switch (psci_fn) {
236         case PSCI_0_2_FN_PSCI_VERSION:
237                 /*
238                  * Bits[31:16] = Major Version = 0
239                  * Bits[15:0] = Minor Version = 2
240                  */
241                 val = KVM_ARM_PSCI_0_2;
242                 break;
243         case PSCI_0_2_FN_CPU_SUSPEND:
244         case PSCI_0_2_FN64_CPU_SUSPEND:
245                 val = kvm_psci_vcpu_suspend(vcpu);
246                 break;
247         case PSCI_0_2_FN_CPU_OFF:
248                 kvm_psci_vcpu_off(vcpu);
249                 val = PSCI_RET_SUCCESS;
250                 break;
251         case PSCI_0_2_FN_CPU_ON:
252                 kvm_psci_narrow_to_32bit(vcpu);
253                 fallthrough;
254         case PSCI_0_2_FN64_CPU_ON:
255                 mutex_lock(&kvm->lock);
256                 val = kvm_psci_vcpu_on(vcpu);
257                 mutex_unlock(&kvm->lock);
258                 break;
259         case PSCI_0_2_FN_AFFINITY_INFO:
260                 kvm_psci_narrow_to_32bit(vcpu);
261                 fallthrough;
262         case PSCI_0_2_FN64_AFFINITY_INFO:
263                 val = kvm_psci_vcpu_affinity_info(vcpu);
264                 break;
265         case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
266                 /*
267                  * Trusted OS is MP hence does not require migration
268                  * or
269                  * Trusted OS is not present
270                  */
271                 val = PSCI_0_2_TOS_MP;
272                 break;
273         case PSCI_0_2_FN_SYSTEM_OFF:
274                 kvm_psci_system_off(vcpu);
275                 /*
276                  * We shouldn't be going back to guest VCPU after
277                  * receiving SYSTEM_OFF request.
278                  *
279                  * If user space accidentally/deliberately resumes
280                  * guest VCPU after SYSTEM_OFF request then guest
281                  * VCPU should see internal failure from PSCI return
282                  * value. To achieve this, we preload r0 (or x0) with
283                  * PSCI return value INTERNAL_FAILURE.
284                  */
285                 val = PSCI_RET_INTERNAL_FAILURE;
286                 ret = 0;
287                 break;
288         case PSCI_0_2_FN_SYSTEM_RESET:
289                 kvm_psci_system_reset(vcpu);
290                 /*
291                  * Same reason as SYSTEM_OFF for preloading r0 (or x0)
292                  * with PSCI return value INTERNAL_FAILURE.
293                  */
294                 val = PSCI_RET_INTERNAL_FAILURE;
295                 ret = 0;
296                 break;
297         default:
298                 val = PSCI_RET_NOT_SUPPORTED;
299                 break;
300         }
301
302 out:
303         smccc_set_retval(vcpu, val, 0, 0, 0);
304         return ret;
305 }
306
307 static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu)
308 {
309         u32 psci_fn = smccc_get_function(vcpu);
310         u32 feature;
311         unsigned long val;
312         int ret = 1;
313
314         switch(psci_fn) {
315         case PSCI_0_2_FN_PSCI_VERSION:
316                 val = KVM_ARM_PSCI_1_0;
317                 break;
318         case PSCI_1_0_FN_PSCI_FEATURES:
319                 feature = smccc_get_arg1(vcpu);
320                 val = kvm_psci_check_allowed_function(vcpu, feature);
321                 if (val)
322                         break;
323
324                 switch(feature) {
325                 case PSCI_0_2_FN_PSCI_VERSION:
326                 case PSCI_0_2_FN_CPU_SUSPEND:
327                 case PSCI_0_2_FN64_CPU_SUSPEND:
328                 case PSCI_0_2_FN_CPU_OFF:
329                 case PSCI_0_2_FN_CPU_ON:
330                 case PSCI_0_2_FN64_CPU_ON:
331                 case PSCI_0_2_FN_AFFINITY_INFO:
332                 case PSCI_0_2_FN64_AFFINITY_INFO:
333                 case PSCI_0_2_FN_MIGRATE_INFO_TYPE:
334                 case PSCI_0_2_FN_SYSTEM_OFF:
335                 case PSCI_0_2_FN_SYSTEM_RESET:
336                 case PSCI_1_0_FN_PSCI_FEATURES:
337                 case ARM_SMCCC_VERSION_FUNC_ID:
338                         val = 0;
339                         break;
340                 default:
341                         val = PSCI_RET_NOT_SUPPORTED;
342                         break;
343                 }
344                 break;
345         default:
346                 return kvm_psci_0_2_call(vcpu);
347         }
348
349         smccc_set_retval(vcpu, val, 0, 0, 0);
350         return ret;
351 }
352
353 static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
354 {
355         struct kvm *kvm = vcpu->kvm;
356         u32 psci_fn = smccc_get_function(vcpu);
357         unsigned long val;
358
359         switch (psci_fn) {
360         case KVM_PSCI_FN_CPU_OFF:
361                 kvm_psci_vcpu_off(vcpu);
362                 val = PSCI_RET_SUCCESS;
363                 break;
364         case KVM_PSCI_FN_CPU_ON:
365                 mutex_lock(&kvm->lock);
366                 val = kvm_psci_vcpu_on(vcpu);
367                 mutex_unlock(&kvm->lock);
368                 break;
369         default:
370                 val = PSCI_RET_NOT_SUPPORTED;
371                 break;
372         }
373
374         smccc_set_retval(vcpu, val, 0, 0, 0);
375         return 1;
376 }
377
378 /**
379  * kvm_psci_call - handle PSCI call if r0 value is in range
380  * @vcpu: Pointer to the VCPU struct
381  *
382  * Handle PSCI calls from guests through traps from HVC instructions.
383  * The calling convention is similar to SMC calls to the secure world
384  * where the function number is placed in r0.
385  *
386  * This function returns: > 0 (success), 0 (success but exit to user
387  * space), and < 0 (errors)
388  *
389  * Errors:
390  * -EINVAL: Unrecognized PSCI function
391  */
392 int kvm_psci_call(struct kvm_vcpu *vcpu)
393 {
394         switch (kvm_psci_version(vcpu, vcpu->kvm)) {
395         case KVM_ARM_PSCI_1_0:
396                 return kvm_psci_1_0_call(vcpu);
397         case KVM_ARM_PSCI_0_2:
398                 return kvm_psci_0_2_call(vcpu);
399         case KVM_ARM_PSCI_0_1:
400                 return kvm_psci_0_1_call(vcpu);
401         default:
402                 return -EINVAL;
403         };
404 }
405
406 int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
407 {
408         return 4;               /* PSCI version and three workaround registers */
409 }
410
411 int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
412 {
413         if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
414                 return -EFAULT;
415
416         if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
417                 return -EFAULT;
418
419         if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
420                 return -EFAULT;
421
422         if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++))
423                 return -EFAULT;
424
425         return 0;
426 }
427
428 #define KVM_REG_FEATURE_LEVEL_WIDTH     4
429 #define KVM_REG_FEATURE_LEVEL_MASK      (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
430
431 /*
432  * Convert the workaround level into an easy-to-compare number, where higher
433  * values mean better protection.
434  */
435 static int get_kernel_wa_level(u64 regid)
436 {
437         switch (regid) {
438         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
439                 switch (arm64_get_spectre_v2_state()) {
440                 case SPECTRE_VULNERABLE:
441                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
442                 case SPECTRE_MITIGATED:
443                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
444                 case SPECTRE_UNAFFECTED:
445                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
446                 }
447                 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
448         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
449                 switch (arm64_get_spectre_v4_state()) {
450                 case SPECTRE_MITIGATED:
451                         /*
452                          * As for the hypercall discovery, we pretend we
453                          * don't have any FW mitigation if SSBS is there at
454                          * all times.
455                          */
456                         if (cpus_have_final_cap(ARM64_SSBS))
457                                 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
458                         fallthrough;
459                 case SPECTRE_UNAFFECTED:
460                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
461                 case SPECTRE_VULNERABLE:
462                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
463                 }
464                 break;
465         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
466                 switch (arm64_get_spectre_bhb_state()) {
467                 case SPECTRE_VULNERABLE:
468                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
469                 case SPECTRE_MITIGATED:
470                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL;
471                 case SPECTRE_UNAFFECTED:
472                         return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED;
473                 }
474                 return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL;
475         }
476
477         return -EINVAL;
478 }
479
480 int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
481 {
482         void __user *uaddr = (void __user *)(long)reg->addr;
483         u64 val;
484
485         switch (reg->id) {
486         case KVM_REG_ARM_PSCI_VERSION:
487                 val = kvm_psci_version(vcpu, vcpu->kvm);
488                 break;
489         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
490         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
491         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
492                 val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
493                 break;
494         default:
495                 return -ENOENT;
496         }
497
498         if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
499                 return -EFAULT;
500
501         return 0;
502 }
503
504 int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
505 {
506         void __user *uaddr = (void __user *)(long)reg->addr;
507         u64 val;
508         int wa_level;
509
510         if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
511                 return -EFAULT;
512
513         switch (reg->id) {
514         case KVM_REG_ARM_PSCI_VERSION:
515         {
516                 bool wants_02;
517
518                 wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
519
520                 switch (val) {
521                 case KVM_ARM_PSCI_0_1:
522                         if (wants_02)
523                                 return -EINVAL;
524                         vcpu->kvm->arch.psci_version = val;
525                         return 0;
526                 case KVM_ARM_PSCI_0_2:
527                 case KVM_ARM_PSCI_1_0:
528                         if (!wants_02)
529                                 return -EINVAL;
530                         vcpu->kvm->arch.psci_version = val;
531                         return 0;
532                 }
533                 break;
534         }
535
536         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
537         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3:
538                 if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
539                         return -EINVAL;
540
541                 if (get_kernel_wa_level(reg->id) < val)
542                         return -EINVAL;
543
544                 return 0;
545
546         case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
547                 if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
548                             KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
549                         return -EINVAL;
550
551                 /* The enabled bit must not be set unless the level is AVAIL. */
552                 if ((val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED) &&
553                     (val & KVM_REG_FEATURE_LEVEL_MASK) != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL)
554                         return -EINVAL;
555
556                 /*
557                  * Map all the possible incoming states to the only two we
558                  * really want to deal with.
559                  */
560                 switch (val & KVM_REG_FEATURE_LEVEL_MASK) {
561                 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL:
562                 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN:
563                         wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
564                         break;
565                 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
566                 case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
567                         wa_level = KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
568                         break;
569                 default:
570                         return -EINVAL;
571                 }
572
573                 /*
574                  * We can deal with NOT_AVAIL on NOT_REQUIRED, but not the
575                  * other way around.
576                  */
577                 if (get_kernel_wa_level(reg->id) < wa_level)
578                         return -EINVAL;
579
580                 return 0;
581         default:
582                 return -ENOENT;
583         }
584
585         return -EINVAL;
586 }