KVM: s390: Add memcg accounting to KVM allocations
[linux-2.6-microblaze.git] / arch / s390 / kvm / intercept.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * in-kernel handling for sie intercepts
4  *
5  * Copyright IBM Corp. 2008, 2020
6  *
7  *    Author(s): Carsten Otte <cotte@de.ibm.com>
8  *               Christian Borntraeger <borntraeger@de.ibm.com>
9  */
10
11 #include <linux/kvm_host.h>
12 #include <linux/errno.h>
13 #include <linux/pagemap.h>
14
15 #include <asm/asm-offsets.h>
16 #include <asm/irq.h>
17 #include <asm/sysinfo.h>
18 #include <asm/uv.h>
19
20 #include "kvm-s390.h"
21 #include "gaccess.h"
22 #include "trace.h"
23 #include "trace-s390.h"
24
25 u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
26 {
27         struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
28         u8 ilen = 0;
29
30         switch (vcpu->arch.sie_block->icptcode) {
31         case ICPT_INST:
32         case ICPT_INSTPROGI:
33         case ICPT_OPEREXC:
34         case ICPT_PARTEXEC:
35         case ICPT_IOINST:
36                 /* instruction only stored for these icptcodes */
37                 ilen = insn_length(vcpu->arch.sie_block->ipa >> 8);
38                 /* Use the length of the EXECUTE instruction if necessary */
39                 if (sie_block->icptstatus & 1) {
40                         ilen = (sie_block->icptstatus >> 4) & 0x6;
41                         if (!ilen)
42                                 ilen = 4;
43                 }
44                 break;
45         case ICPT_PROGI:
46                 /* bit 1+2 of pgmilc are the ilc, so we directly get ilen */
47                 ilen = vcpu->arch.sie_block->pgmilc & 0x6;
48                 break;
49         }
50         return ilen;
51 }
52
53 static int handle_stop(struct kvm_vcpu *vcpu)
54 {
55         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
56         int rc = 0;
57         uint8_t flags, stop_pending;
58
59         vcpu->stat.exit_stop_request++;
60
61         /* delay the stop if any non-stop irq is pending */
62         if (kvm_s390_vcpu_has_irq(vcpu, 1))
63                 return 0;
64
65         /* avoid races with the injection/SIGP STOP code */
66         spin_lock(&li->lock);
67         flags = li->irq.stop.flags;
68         stop_pending = kvm_s390_is_stop_irq_pending(vcpu);
69         spin_unlock(&li->lock);
70
71         trace_kvm_s390_stop_request(stop_pending, flags);
72         if (!stop_pending)
73                 return 0;
74
75         if (flags & KVM_S390_STOP_FLAG_STORE_STATUS) {
76                 rc = kvm_s390_vcpu_store_status(vcpu,
77                                                 KVM_S390_STORE_STATUS_NOADDR);
78                 if (rc)
79                         return rc;
80         }
81
82         /*
83          * no need to check the return value of vcpu_stop as it can only have
84          * an error for protvirt, but protvirt means user cpu state
85          */
86         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
87                 kvm_s390_vcpu_stop(vcpu);
88         return -EOPNOTSUPP;
89 }
90
91 static int handle_validity(struct kvm_vcpu *vcpu)
92 {
93         int viwhy = vcpu->arch.sie_block->ipb >> 16;
94
95         vcpu->stat.exit_validity++;
96         trace_kvm_s390_intercept_validity(vcpu, viwhy);
97         KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
98                   current->pid, vcpu->kvm);
99
100         /* do not warn on invalid runtime instrumentation mode */
101         WARN_ONCE(viwhy != 0x44, "kvm: unhandled validity intercept 0x%x\n",
102                   viwhy);
103         return -EINVAL;
104 }
105
106 static int handle_instruction(struct kvm_vcpu *vcpu)
107 {
108         vcpu->stat.exit_instruction++;
109         trace_kvm_s390_intercept_instruction(vcpu,
110                                              vcpu->arch.sie_block->ipa,
111                                              vcpu->arch.sie_block->ipb);
112
113         switch (vcpu->arch.sie_block->ipa >> 8) {
114         case 0x01:
115                 return kvm_s390_handle_01(vcpu);
116         case 0x82:
117                 return kvm_s390_handle_lpsw(vcpu);
118         case 0x83:
119                 return kvm_s390_handle_diag(vcpu);
120         case 0xaa:
121                 return kvm_s390_handle_aa(vcpu);
122         case 0xae:
123                 return kvm_s390_handle_sigp(vcpu);
124         case 0xb2:
125                 return kvm_s390_handle_b2(vcpu);
126         case 0xb6:
127                 return kvm_s390_handle_stctl(vcpu);
128         case 0xb7:
129                 return kvm_s390_handle_lctl(vcpu);
130         case 0xb9:
131                 return kvm_s390_handle_b9(vcpu);
132         case 0xe3:
133                 return kvm_s390_handle_e3(vcpu);
134         case 0xe5:
135                 return kvm_s390_handle_e5(vcpu);
136         case 0xeb:
137                 return kvm_s390_handle_eb(vcpu);
138         default:
139                 return -EOPNOTSUPP;
140         }
141 }
142
143 static int inject_prog_on_prog_intercept(struct kvm_vcpu *vcpu)
144 {
145         struct kvm_s390_pgm_info pgm_info = {
146                 .code = vcpu->arch.sie_block->iprcc,
147                 /* the PSW has already been rewound */
148                 .flags = KVM_S390_PGM_FLAGS_NO_REWIND,
149         };
150
151         switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
152         case PGM_AFX_TRANSLATION:
153         case PGM_ASX_TRANSLATION:
154         case PGM_EX_TRANSLATION:
155         case PGM_LFX_TRANSLATION:
156         case PGM_LSTE_SEQUENCE:
157         case PGM_LSX_TRANSLATION:
158         case PGM_LX_TRANSLATION:
159         case PGM_PRIMARY_AUTHORITY:
160         case PGM_SECONDARY_AUTHORITY:
161         case PGM_SPACE_SWITCH:
162                 pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
163                 break;
164         case PGM_ALEN_TRANSLATION:
165         case PGM_ALE_SEQUENCE:
166         case PGM_ASTE_INSTANCE:
167         case PGM_ASTE_SEQUENCE:
168         case PGM_ASTE_VALIDITY:
169         case PGM_EXTENDED_AUTHORITY:
170                 pgm_info.exc_access_id = vcpu->arch.sie_block->eai;
171                 break;
172         case PGM_ASCE_TYPE:
173         case PGM_PAGE_TRANSLATION:
174         case PGM_REGION_FIRST_TRANS:
175         case PGM_REGION_SECOND_TRANS:
176         case PGM_REGION_THIRD_TRANS:
177         case PGM_SEGMENT_TRANSLATION:
178                 pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
179                 pgm_info.exc_access_id  = vcpu->arch.sie_block->eai;
180                 pgm_info.op_access_id  = vcpu->arch.sie_block->oai;
181                 break;
182         case PGM_MONITOR:
183                 pgm_info.mon_class_nr = vcpu->arch.sie_block->mcn;
184                 pgm_info.mon_code = vcpu->arch.sie_block->tecmc;
185                 break;
186         case PGM_VECTOR_PROCESSING:
187         case PGM_DATA:
188                 pgm_info.data_exc_code = vcpu->arch.sie_block->dxc;
189                 break;
190         case PGM_PROTECTION:
191                 pgm_info.trans_exc_code = vcpu->arch.sie_block->tecmc;
192                 pgm_info.exc_access_id  = vcpu->arch.sie_block->eai;
193                 break;
194         default:
195                 break;
196         }
197
198         if (vcpu->arch.sie_block->iprcc & PGM_PER) {
199                 pgm_info.per_code = vcpu->arch.sie_block->perc;
200                 pgm_info.per_atmid = vcpu->arch.sie_block->peratmid;
201                 pgm_info.per_address = vcpu->arch.sie_block->peraddr;
202                 pgm_info.per_access_id = vcpu->arch.sie_block->peraid;
203         }
204         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
205 }
206
207 /*
208  * restore ITDB to program-interruption TDB in guest lowcore
209  * and set TX abort indication if required
210 */
211 static int handle_itdb(struct kvm_vcpu *vcpu)
212 {
213         struct kvm_s390_itdb *itdb;
214         int rc;
215
216         if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
217                 return 0;
218         if (current->thread.per_flags & PER_FLAG_NO_TE)
219                 return 0;
220         itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
221         rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
222         if (rc)
223                 return rc;
224         memset(itdb, 0, sizeof(*itdb));
225
226         return 0;
227 }
228
229 #define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
230
231 static int handle_prog(struct kvm_vcpu *vcpu)
232 {
233         psw_t psw;
234         int rc;
235
236         vcpu->stat.exit_program_interruption++;
237
238         /*
239          * Intercept 8 indicates a loop of specification exceptions
240          * for protected guests.
241          */
242         if (kvm_s390_pv_cpu_is_protected(vcpu))
243                 return -EOPNOTSUPP;
244
245         if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
246                 rc = kvm_s390_handle_per_event(vcpu);
247                 if (rc)
248                         return rc;
249                 /* the interrupt might have been filtered out completely */
250                 if (vcpu->arch.sie_block->iprcc == 0)
251                         return 0;
252         }
253
254         trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
255         if (vcpu->arch.sie_block->iprcc == PGM_SPECIFICATION) {
256                 rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &psw, sizeof(psw_t));
257                 if (rc)
258                         return rc;
259                 /* Avoid endless loops of specification exceptions */
260                 if (!is_valid_psw(&psw))
261                         return -EOPNOTSUPP;
262         }
263         rc = handle_itdb(vcpu);
264         if (rc)
265                 return rc;
266
267         return inject_prog_on_prog_intercept(vcpu);
268 }
269
270 /**
271  * handle_external_interrupt - used for external interruption interceptions
272  *
273  * This interception only occurs if the CPUSTAT_EXT_INT bit was set, or if
274  * the new PSW does not have external interrupts disabled. In the first case,
275  * we've got to deliver the interrupt manually, and in the second case, we
276  * drop to userspace to handle the situation there.
277  */
278 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
279 {
280         u16 eic = vcpu->arch.sie_block->eic;
281         struct kvm_s390_irq irq;
282         psw_t newpsw;
283         int rc;
284
285         vcpu->stat.exit_external_interrupt++;
286
287         rc = read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &newpsw, sizeof(psw_t));
288         if (rc)
289                 return rc;
290         /* We can not handle clock comparator or timer interrupt with bad PSW */
291         if ((eic == EXT_IRQ_CLK_COMP || eic == EXT_IRQ_CPU_TIMER) &&
292             (newpsw.mask & PSW_MASK_EXT))
293                 return -EOPNOTSUPP;
294
295         switch (eic) {
296         case EXT_IRQ_CLK_COMP:
297                 irq.type = KVM_S390_INT_CLOCK_COMP;
298                 break;
299         case EXT_IRQ_CPU_TIMER:
300                 irq.type = KVM_S390_INT_CPU_TIMER;
301                 break;
302         case EXT_IRQ_EXTERNAL_CALL:
303                 irq.type = KVM_S390_INT_EXTERNAL_CALL;
304                 irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
305                 rc = kvm_s390_inject_vcpu(vcpu, &irq);
306                 /* ignore if another external call is already pending */
307                 if (rc == -EBUSY)
308                         return 0;
309                 return rc;
310         default:
311                 return -EOPNOTSUPP;
312         }
313
314         return kvm_s390_inject_vcpu(vcpu, &irq);
315 }
316
317 /**
318  * Handle MOVE PAGE partial execution interception.
319  *
320  * This interception can only happen for guests with DAT disabled and
321  * addresses that are currently not mapped in the host. Thus we try to
322  * set up the mappings for the corresponding user pages here (or throw
323  * addressing exceptions in case of illegal guest addresses).
324  */
325 static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
326 {
327         unsigned long srcaddr, dstaddr;
328         int reg1, reg2, rc;
329
330         kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
331
332         /* Make sure that the source is paged-in */
333         rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
334                                      reg2, &srcaddr, GACC_FETCH);
335         if (rc)
336                 return kvm_s390_inject_prog_cond(vcpu, rc);
337         rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
338         if (rc != 0)
339                 return rc;
340
341         /* Make sure that the destination is paged-in */
342         rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
343                                      reg1, &dstaddr, GACC_STORE);
344         if (rc)
345                 return kvm_s390_inject_prog_cond(vcpu, rc);
346         rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
347         if (rc != 0)
348                 return rc;
349
350         kvm_s390_retry_instr(vcpu);
351
352         return 0;
353 }
354
355 static int handle_partial_execution(struct kvm_vcpu *vcpu)
356 {
357         vcpu->stat.exit_pei++;
358
359         if (vcpu->arch.sie_block->ipa == 0xb254)        /* MVPG */
360                 return handle_mvpg_pei(vcpu);
361         if (vcpu->arch.sie_block->ipa >> 8 == 0xae)     /* SIGP */
362                 return kvm_s390_handle_sigp_pei(vcpu);
363
364         return -EOPNOTSUPP;
365 }
366
367 /*
368  * Handle the sthyi instruction that provides the guest with system
369  * information, like current CPU resources available at each level of
370  * the machine.
371  */
372 int handle_sthyi(struct kvm_vcpu *vcpu)
373 {
374         int reg1, reg2, r = 0;
375         u64 code, addr, cc = 0, rc = 0;
376         struct sthyi_sctns *sctns = NULL;
377
378         if (!test_kvm_facility(vcpu->kvm, 74))
379                 return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
380
381         kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
382         code = vcpu->run->s.regs.gprs[reg1];
383         addr = vcpu->run->s.regs.gprs[reg2];
384
385         vcpu->stat.instruction_sthyi++;
386         VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
387         trace_kvm_s390_handle_sthyi(vcpu, code, addr);
388
389         if (reg1 == reg2 || reg1 & 1 || reg2 & 1)
390                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
391
392         if (code & 0xffff) {
393                 cc = 3;
394                 rc = 4;
395                 goto out;
396         }
397
398         if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
399                 return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
400
401         sctns = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
402         if (!sctns)
403                 return -ENOMEM;
404
405         cc = sthyi_fill(sctns, &rc);
406
407 out:
408         if (!cc) {
409                 if (kvm_s390_pv_cpu_is_protected(vcpu)) {
410                         memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
411                                sctns, PAGE_SIZE);
412                 } else {
413                         r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
414                         if (r) {
415                                 free_page((unsigned long)sctns);
416                                 return kvm_s390_inject_prog_cond(vcpu, r);
417                         }
418                 }
419         }
420
421         free_page((unsigned long)sctns);
422         vcpu->run->s.regs.gprs[reg2 + 1] = rc;
423         kvm_s390_set_psw_cc(vcpu, cc);
424         return r;
425 }
426
427 static int handle_operexc(struct kvm_vcpu *vcpu)
428 {
429         psw_t oldpsw, newpsw;
430         int rc;
431
432         vcpu->stat.exit_operation_exception++;
433         trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
434                                       vcpu->arch.sie_block->ipb);
435
436         if (vcpu->arch.sie_block->ipa == 0xb256)
437                 return handle_sthyi(vcpu);
438
439         if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
440                 return -EOPNOTSUPP;
441         rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t));
442         if (rc)
443                 return rc;
444         /*
445          * Avoid endless loops of operation exceptions, if the pgm new
446          * PSW will cause a new operation exception.
447          * The heuristic checks if the pgm new psw is within 6 bytes before
448          * the faulting psw address (with same DAT, AS settings) and the
449          * new psw is not a wait psw and the fault was not triggered by
450          * problem state.
451          */
452         oldpsw = vcpu->arch.sie_block->gpsw;
453         if (oldpsw.addr - newpsw.addr <= 6 &&
454             !(newpsw.mask & PSW_MASK_WAIT) &&
455             !(oldpsw.mask & PSW_MASK_PSTATE) &&
456             (newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) &&
457             (newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT))
458                 return -EOPNOTSUPP;
459
460         return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
461 }
462
463 static int handle_pv_spx(struct kvm_vcpu *vcpu)
464 {
465         u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
466
467         kvm_s390_set_prefix(vcpu, pref);
468         trace_kvm_s390_handle_prefix(vcpu, 1, pref);
469         return 0;
470 }
471
472 static int handle_pv_sclp(struct kvm_vcpu *vcpu)
473 {
474         struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
475
476         spin_lock(&fi->lock);
477         /*
478          * 2 cases:
479          * a: an sccb answering interrupt was already pending or in flight.
480          *    As the sccb value is not known we can simply set some value to
481          *    trigger delivery of a saved SCCB. UV will then use its saved
482          *    copy of the SCCB value.
483          * b: an error SCCB interrupt needs to be injected so we also inject
484          *    a fake SCCB address. Firmware will use the proper one.
485          * This makes sure, that both errors and real sccb returns will only
486          * be delivered after a notification intercept (instruction has
487          * finished) but not after others.
488          */
489         fi->srv_signal.ext_params |= 0x43000;
490         set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
491         clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
492         spin_unlock(&fi->lock);
493         return 0;
494 }
495
496 static int handle_pv_uvc(struct kvm_vcpu *vcpu)
497 {
498         struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
499         struct uv_cb_cts uvcb = {
500                 .header.cmd     = UVC_CMD_UNPIN_PAGE_SHARED,
501                 .header.len     = sizeof(uvcb),
502                 .guest_handle   = kvm_s390_pv_get_handle(vcpu->kvm),
503                 .gaddr          = guest_uvcb->paddr,
504         };
505         int rc;
506
507         if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) {
508                 WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n",
509                           guest_uvcb->header.cmd);
510                 return 0;
511         }
512         rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
513         /*
514          * If the unpin did not succeed, the guest will exit again for the UVC
515          * and we will retry the unpin.
516          */
517         if (rc == -EINVAL)
518                 return 0;
519         return rc;
520 }
521
522 static int handle_pv_notification(struct kvm_vcpu *vcpu)
523 {
524         if (vcpu->arch.sie_block->ipa == 0xb210)
525                 return handle_pv_spx(vcpu);
526         if (vcpu->arch.sie_block->ipa == 0xb220)
527                 return handle_pv_sclp(vcpu);
528         if (vcpu->arch.sie_block->ipa == 0xb9a4)
529                 return handle_pv_uvc(vcpu);
530
531         return handle_instruction(vcpu);
532 }
533
534 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
535 {
536         int rc, per_rc = 0;
537
538         if (kvm_is_ucontrol(vcpu->kvm))
539                 return -EOPNOTSUPP;
540
541         switch (vcpu->arch.sie_block->icptcode) {
542         case ICPT_EXTREQ:
543                 vcpu->stat.exit_external_request++;
544                 return 0;
545         case ICPT_IOREQ:
546                 vcpu->stat.exit_io_request++;
547                 return 0;
548         case ICPT_INST:
549                 rc = handle_instruction(vcpu);
550                 break;
551         case ICPT_PROGI:
552                 return handle_prog(vcpu);
553         case ICPT_EXTINT:
554                 return handle_external_interrupt(vcpu);
555         case ICPT_WAIT:
556                 return kvm_s390_handle_wait(vcpu);
557         case ICPT_VALIDITY:
558                 return handle_validity(vcpu);
559         case ICPT_STOP:
560                 return handle_stop(vcpu);
561         case ICPT_OPEREXC:
562                 rc = handle_operexc(vcpu);
563                 break;
564         case ICPT_PARTEXEC:
565                 rc = handle_partial_execution(vcpu);
566                 break;
567         case ICPT_KSS:
568                 rc = kvm_s390_skey_check_enable(vcpu);
569                 break;
570         case ICPT_MCHKREQ:
571         case ICPT_INT_ENABLE:
572                 /*
573                  * PSW bit 13 or a CR (0, 6, 14) changed and we might
574                  * now be able to deliver interrupts. The pre-run code
575                  * will take care of this.
576                  */
577                 rc = 0;
578                 break;
579         case ICPT_PV_INSTR:
580                 rc = handle_instruction(vcpu);
581                 break;
582         case ICPT_PV_NOTIFY:
583                 rc = handle_pv_notification(vcpu);
584                 break;
585         case ICPT_PV_PREF:
586                 rc = 0;
587                 gmap_convert_to_secure(vcpu->arch.gmap,
588                                        kvm_s390_get_prefix(vcpu));
589                 gmap_convert_to_secure(vcpu->arch.gmap,
590                                        kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
591                 break;
592         default:
593                 return -EOPNOTSUPP;
594         }
595
596         /* process PER, also if the instrution is processed in user space */
597         if (vcpu->arch.sie_block->icptstatus & 0x02 &&
598             (!rc || rc == -EOPNOTSUPP))
599                 per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
600         return per_rc ? per_rc : rc;
601 }