Merge tag 'platform-drivers-x86-v5.3-1' of git://git.infradead.org/linux-platform...
[linux-2.6-microblaze.git] / arch / powerpc / kvm / book3s_xive_native.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2017-2019, IBM Corporation.
4  */
5
6 #define pr_fmt(fmt) "xive-kvm: " fmt
7
8 #include <linux/kernel.h>
9 #include <linux/kvm_host.h>
10 #include <linux/err.h>
11 #include <linux/gfp.h>
12 #include <linux/spinlock.h>
13 #include <linux/delay.h>
14 #include <linux/file.h>
15 #include <asm/uaccess.h>
16 #include <asm/kvm_book3s.h>
17 #include <asm/kvm_ppc.h>
18 #include <asm/hvcall.h>
19 #include <asm/xive.h>
20 #include <asm/xive-regs.h>
21 #include <asm/debug.h>
22 #include <asm/debugfs.h>
23 #include <asm/opal.h>
24
25 #include <linux/debugfs.h>
26 #include <linux/seq_file.h>
27
28 #include "book3s_xive.h"
29
30 static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31 {
32         u64 val;
33
34         if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
35                 offset |= offset << 4;
36
37         val = in_be64(xd->eoi_mmio + offset);
38         return (u8)val;
39 }
40
41 static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
42 {
43         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
44         struct xive_q *q = &xc->queues[prio];
45
46         xive_native_disable_queue(xc->vp_id, q, prio);
47         if (q->qpage) {
48                 put_page(virt_to_page(q->qpage));
49                 q->qpage = NULL;
50         }
51 }
52
53 void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
54 {
55         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
56         int i;
57
58         if (!kvmppc_xive_enabled(vcpu))
59                 return;
60
61         if (!xc)
62                 return;
63
64         pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
65
66         /* Ensure no interrupt is still routed to that VP */
67         xc->valid = false;
68         kvmppc_xive_disable_vcpu_interrupts(vcpu);
69
70         /* Disable the VP */
71         xive_native_disable_vp(xc->vp_id);
72
73         /* Free the queues & associated interrupts */
74         for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
75                 /* Free the escalation irq */
76                 if (xc->esc_virq[i]) {
77                         free_irq(xc->esc_virq[i], vcpu);
78                         irq_dispose_mapping(xc->esc_virq[i]);
79                         kfree(xc->esc_virq_names[i]);
80                         xc->esc_virq[i] = 0;
81                 }
82
83                 /* Free the queue */
84                 kvmppc_xive_native_cleanup_queue(vcpu, i);
85         }
86
87         /* Free the VP */
88         kfree(xc);
89
90         /* Cleanup the vcpu */
91         vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
92         vcpu->arch.xive_vcpu = NULL;
93 }
94
95 int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
96                                     struct kvm_vcpu *vcpu, u32 server_num)
97 {
98         struct kvmppc_xive *xive = dev->private;
99         struct kvmppc_xive_vcpu *xc = NULL;
100         int rc;
101
102         pr_devel("native_connect_vcpu(server=%d)\n", server_num);
103
104         if (dev->ops != &kvm_xive_native_ops) {
105                 pr_devel("Wrong ops !\n");
106                 return -EPERM;
107         }
108         if (xive->kvm != vcpu->kvm)
109                 return -EPERM;
110         if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
111                 return -EBUSY;
112         if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
113                 pr_devel("Out of bounds !\n");
114                 return -EINVAL;
115         }
116
117         mutex_lock(&xive->lock);
118
119         if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
120                 pr_devel("Duplicate !\n");
121                 rc = -EEXIST;
122                 goto bail;
123         }
124
125         xc = kzalloc(sizeof(*xc), GFP_KERNEL);
126         if (!xc) {
127                 rc = -ENOMEM;
128                 goto bail;
129         }
130
131         vcpu->arch.xive_vcpu = xc;
132         xc->xive = xive;
133         xc->vcpu = vcpu;
134         xc->server_num = server_num;
135
136         xc->vp_id = kvmppc_xive_vp(xive, server_num);
137         xc->valid = true;
138         vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
139
140         rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
141         if (rc) {
142                 pr_err("Failed to get VP info from OPAL: %d\n", rc);
143                 goto bail;
144         }
145
146         /*
147          * Enable the VP first as the single escalation mode will
148          * affect escalation interrupts numbering
149          */
150         rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
151         if (rc) {
152                 pr_err("Failed to enable VP in OPAL: %d\n", rc);
153                 goto bail;
154         }
155
156         /* Configure VCPU fields for use by assembly push/pull */
157         vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
158         vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
159
160         /* TODO: reset all queues to a clean state ? */
161 bail:
162         mutex_unlock(&xive->lock);
163         if (rc)
164                 kvmppc_xive_native_cleanup_vcpu(vcpu);
165
166         return rc;
167 }
168
169 /*
170  * Device passthrough support
171  */
172 static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
173 {
174         struct kvmppc_xive *xive = kvm->arch.xive;
175         pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
176
177         if (irq >= KVMPPC_XIVE_NR_IRQS)
178                 return -EINVAL;
179
180         /*
181          * Clear the ESB pages of the IRQ number being mapped (or
182          * unmapped) into the guest and let the the VM fault handler
183          * repopulate with the appropriate ESB pages (device or IC)
184          */
185         pr_debug("clearing esb pages for girq 0x%lx\n", irq);
186         mutex_lock(&xive->mapping_lock);
187         if (xive->mapping)
188                 unmap_mapping_range(xive->mapping,
189                                     esb_pgoff << PAGE_SHIFT,
190                                     2ull << PAGE_SHIFT, 1);
191         mutex_unlock(&xive->mapping_lock);
192         return 0;
193 }
194
195 static struct kvmppc_xive_ops kvmppc_xive_native_ops =  {
196         .reset_mapped = kvmppc_xive_native_reset_mapped,
197 };
198
199 static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
200 {
201         struct vm_area_struct *vma = vmf->vma;
202         struct kvm_device *dev = vma->vm_file->private_data;
203         struct kvmppc_xive *xive = dev->private;
204         struct kvmppc_xive_src_block *sb;
205         struct kvmppc_xive_irq_state *state;
206         struct xive_irq_data *xd;
207         u32 hw_num;
208         u16 src;
209         u64 page;
210         unsigned long irq;
211         u64 page_offset;
212
213         /*
214          * Linux/KVM uses a two pages ESB setting, one for trigger and
215          * one for EOI
216          */
217         page_offset = vmf->pgoff - vma->vm_pgoff;
218         irq = page_offset / 2;
219
220         sb = kvmppc_xive_find_source(xive, irq, &src);
221         if (!sb) {
222                 pr_devel("%s: source %lx not found !\n", __func__, irq);
223                 return VM_FAULT_SIGBUS;
224         }
225
226         state = &sb->irq_state[src];
227         kvmppc_xive_select_irq(state, &hw_num, &xd);
228
229         arch_spin_lock(&sb->lock);
230
231         /*
232          * first/even page is for trigger
233          * second/odd page is for EOI and management.
234          */
235         page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
236         arch_spin_unlock(&sb->lock);
237
238         if (WARN_ON(!page)) {
239                 pr_err("%s: accessing invalid ESB page for source %lx !\n",
240                        __func__, irq);
241                 return VM_FAULT_SIGBUS;
242         }
243
244         vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
245         return VM_FAULT_NOPAGE;
246 }
247
248 static const struct vm_operations_struct xive_native_esb_vmops = {
249         .fault = xive_native_esb_fault,
250 };
251
252 static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
253 {
254         struct vm_area_struct *vma = vmf->vma;
255
256         switch (vmf->pgoff - vma->vm_pgoff) {
257         case 0: /* HW - forbid access */
258         case 1: /* HV - forbid access */
259                 return VM_FAULT_SIGBUS;
260         case 2: /* OS */
261                 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
262                 return VM_FAULT_NOPAGE;
263         case 3: /* USER - TODO */
264         default:
265                 return VM_FAULT_SIGBUS;
266         }
267 }
268
269 static const struct vm_operations_struct xive_native_tima_vmops = {
270         .fault = xive_native_tima_fault,
271 };
272
273 static int kvmppc_xive_native_mmap(struct kvm_device *dev,
274                                    struct vm_area_struct *vma)
275 {
276         struct kvmppc_xive *xive = dev->private;
277
278         /* We only allow mappings at fixed offset for now */
279         if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
280                 if (vma_pages(vma) > 4)
281                         return -EINVAL;
282                 vma->vm_ops = &xive_native_tima_vmops;
283         } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
284                 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
285                         return -EINVAL;
286                 vma->vm_ops = &xive_native_esb_vmops;
287         } else {
288                 return -EINVAL;
289         }
290
291         vma->vm_flags |= VM_IO | VM_PFNMAP;
292         vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
293
294         /*
295          * Grab the KVM device file address_space to be able to clear
296          * the ESB pages mapping when a device is passed-through into
297          * the guest.
298          */
299         xive->mapping = vma->vm_file->f_mapping;
300         return 0;
301 }
302
303 static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
304                                          u64 addr)
305 {
306         struct kvmppc_xive_src_block *sb;
307         struct kvmppc_xive_irq_state *state;
308         u64 __user *ubufp = (u64 __user *) addr;
309         u64 val;
310         u16 idx;
311         int rc;
312
313         pr_devel("%s irq=0x%lx\n", __func__, irq);
314
315         if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
316                 return -E2BIG;
317
318         sb = kvmppc_xive_find_source(xive, irq, &idx);
319         if (!sb) {
320                 pr_debug("No source, creating source block...\n");
321                 sb = kvmppc_xive_create_src_block(xive, irq);
322                 if (!sb) {
323                         pr_err("Failed to create block...\n");
324                         return -ENOMEM;
325                 }
326         }
327         state = &sb->irq_state[idx];
328
329         if (get_user(val, ubufp)) {
330                 pr_err("fault getting user info !\n");
331                 return -EFAULT;
332         }
333
334         arch_spin_lock(&sb->lock);
335
336         /*
337          * If the source doesn't already have an IPI, allocate
338          * one and get the corresponding data
339          */
340         if (!state->ipi_number) {
341                 state->ipi_number = xive_native_alloc_irq();
342                 if (state->ipi_number == 0) {
343                         pr_err("Failed to allocate IRQ !\n");
344                         rc = -ENXIO;
345                         goto unlock;
346                 }
347                 xive_native_populate_irq_data(state->ipi_number,
348                                               &state->ipi_data);
349                 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
350                          state->ipi_number, irq);
351         }
352
353         /* Restore LSI state */
354         if (val & KVM_XIVE_LEVEL_SENSITIVE) {
355                 state->lsi = true;
356                 if (val & KVM_XIVE_LEVEL_ASSERTED)
357                         state->asserted = true;
358                 pr_devel("  LSI ! Asserted=%d\n", state->asserted);
359         }
360
361         /* Mask IRQ to start with */
362         state->act_server = 0;
363         state->act_priority = MASKED;
364         xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
365         xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
366
367         /* Increment the number of valid sources and mark this one valid */
368         if (!state->valid)
369                 xive->src_count++;
370         state->valid = true;
371
372         rc = 0;
373
374 unlock:
375         arch_spin_unlock(&sb->lock);
376
377         return rc;
378 }
379
380 static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
381                                         struct kvmppc_xive_src_block *sb,
382                                         struct kvmppc_xive_irq_state *state,
383                                         u32 server, u8 priority, bool masked,
384                                         u32 eisn)
385 {
386         struct kvm *kvm = xive->kvm;
387         u32 hw_num;
388         int rc = 0;
389
390         arch_spin_lock(&sb->lock);
391
392         if (state->act_server == server && state->act_priority == priority &&
393             state->eisn == eisn)
394                 goto unlock;
395
396         pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
397                  priority, server, masked, state->act_server,
398                  state->act_priority);
399
400         kvmppc_xive_select_irq(state, &hw_num, NULL);
401
402         if (priority != MASKED && !masked) {
403                 rc = kvmppc_xive_select_target(kvm, &server, priority);
404                 if (rc)
405                         goto unlock;
406
407                 state->act_priority = priority;
408                 state->act_server = server;
409                 state->eisn = eisn;
410
411                 rc = xive_native_configure_irq(hw_num,
412                                                kvmppc_xive_vp(xive, server),
413                                                priority, eisn);
414         } else {
415                 state->act_priority = MASKED;
416                 state->act_server = 0;
417                 state->eisn = 0;
418
419                 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
420         }
421
422 unlock:
423         arch_spin_unlock(&sb->lock);
424         return rc;
425 }
426
427 static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
428                                                 long irq, u64 addr)
429 {
430         struct kvmppc_xive_src_block *sb;
431         struct kvmppc_xive_irq_state *state;
432         u64 __user *ubufp = (u64 __user *) addr;
433         u16 src;
434         u64 kvm_cfg;
435         u32 server;
436         u8 priority;
437         bool masked;
438         u32 eisn;
439
440         sb = kvmppc_xive_find_source(xive, irq, &src);
441         if (!sb)
442                 return -ENOENT;
443
444         state = &sb->irq_state[src];
445
446         if (!state->valid)
447                 return -EINVAL;
448
449         if (get_user(kvm_cfg, ubufp))
450                 return -EFAULT;
451
452         pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
453
454         priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
455                 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
456         server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
457                 KVM_XIVE_SOURCE_SERVER_SHIFT;
458         masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
459                 KVM_XIVE_SOURCE_MASKED_SHIFT;
460         eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
461                 KVM_XIVE_SOURCE_EISN_SHIFT;
462
463         if (priority != xive_prio_from_guest(priority)) {
464                 pr_err("invalid priority for queue %d for VCPU %d\n",
465                        priority, server);
466                 return -EINVAL;
467         }
468
469         return kvmppc_xive_native_update_source_config(xive, sb, state, server,
470                                                        priority, masked, eisn);
471 }
472
473 static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
474                                           long irq, u64 addr)
475 {
476         struct kvmppc_xive_src_block *sb;
477         struct kvmppc_xive_irq_state *state;
478         struct xive_irq_data *xd;
479         u32 hw_num;
480         u16 src;
481         int rc = 0;
482
483         pr_devel("%s irq=0x%lx", __func__, irq);
484
485         sb = kvmppc_xive_find_source(xive, irq, &src);
486         if (!sb)
487                 return -ENOENT;
488
489         state = &sb->irq_state[src];
490
491         rc = -EINVAL;
492
493         arch_spin_lock(&sb->lock);
494
495         if (state->valid) {
496                 kvmppc_xive_select_irq(state, &hw_num, &xd);
497                 xive_native_sync_source(hw_num);
498                 rc = 0;
499         }
500
501         arch_spin_unlock(&sb->lock);
502         return rc;
503 }
504
505 static int xive_native_validate_queue_size(u32 qshift)
506 {
507         /*
508          * We only support 64K pages for the moment. This is also
509          * advertised in the DT property "ibm,xive-eq-sizes"
510          */
511         switch (qshift) {
512         case 0: /* EQ reset */
513         case 16:
514                 return 0;
515         case 12:
516         case 21:
517         case 24:
518         default:
519                 return -EINVAL;
520         }
521 }
522
523 static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
524                                                long eq_idx, u64 addr)
525 {
526         struct kvm *kvm = xive->kvm;
527         struct kvm_vcpu *vcpu;
528         struct kvmppc_xive_vcpu *xc;
529         void __user *ubufp = (void __user *) addr;
530         u32 server;
531         u8 priority;
532         struct kvm_ppc_xive_eq kvm_eq;
533         int rc;
534         __be32 *qaddr = 0;
535         struct page *page;
536         struct xive_q *q;
537         gfn_t gfn;
538         unsigned long page_size;
539         int srcu_idx;
540
541         /*
542          * Demangle priority/server tuple from the EQ identifier
543          */
544         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
545                 KVM_XIVE_EQ_PRIORITY_SHIFT;
546         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
547                 KVM_XIVE_EQ_SERVER_SHIFT;
548
549         if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
550                 return -EFAULT;
551
552         vcpu = kvmppc_xive_find_server(kvm, server);
553         if (!vcpu) {
554                 pr_err("Can't find server %d\n", server);
555                 return -ENOENT;
556         }
557         xc = vcpu->arch.xive_vcpu;
558
559         if (priority != xive_prio_from_guest(priority)) {
560                 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
561                        priority, server);
562                 return -EINVAL;
563         }
564         q = &xc->queues[priority];
565
566         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
567                  __func__, server, priority, kvm_eq.flags,
568                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
569
570         /* reset queue and disable queueing */
571         if (!kvm_eq.qshift) {
572                 q->guest_qaddr  = 0;
573                 q->guest_qshift = 0;
574
575                 rc = xive_native_configure_queue(xc->vp_id, q, priority,
576                                                  NULL, 0, true);
577                 if (rc) {
578                         pr_err("Failed to reset queue %d for VCPU %d: %d\n",
579                                priority, xc->server_num, rc);
580                         return rc;
581                 }
582
583                 if (q->qpage) {
584                         put_page(virt_to_page(q->qpage));
585                         q->qpage = NULL;
586                 }
587
588                 return 0;
589         }
590
591         /*
592          * sPAPR specifies a "Unconditional Notify (n) flag" for the
593          * H_INT_SET_QUEUE_CONFIG hcall which forces notification
594          * without using the coalescing mechanisms provided by the
595          * XIVE END ESBs. This is required on KVM as notification
596          * using the END ESBs is not supported.
597          */
598         if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
599                 pr_err("invalid flags %d\n", kvm_eq.flags);
600                 return -EINVAL;
601         }
602
603         rc = xive_native_validate_queue_size(kvm_eq.qshift);
604         if (rc) {
605                 pr_err("invalid queue size %d\n", kvm_eq.qshift);
606                 return rc;
607         }
608
609         if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
610                 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
611                        1ull << kvm_eq.qshift);
612                 return -EINVAL;
613         }
614
615         srcu_idx = srcu_read_lock(&kvm->srcu);
616         gfn = gpa_to_gfn(kvm_eq.qaddr);
617         page = gfn_to_page(kvm, gfn);
618         if (is_error_page(page)) {
619                 srcu_read_unlock(&kvm->srcu, srcu_idx);
620                 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
621                 return -EINVAL;
622         }
623
624         page_size = kvm_host_page_size(kvm, gfn);
625         if (1ull << kvm_eq.qshift > page_size) {
626                 srcu_read_unlock(&kvm->srcu, srcu_idx);
627                 pr_warn("Incompatible host page size %lx!\n", page_size);
628                 return -EINVAL;
629         }
630
631         qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
632         srcu_read_unlock(&kvm->srcu, srcu_idx);
633
634         /*
635          * Backup the queue page guest address to the mark EQ page
636          * dirty for migration.
637          */
638         q->guest_qaddr  = kvm_eq.qaddr;
639         q->guest_qshift = kvm_eq.qshift;
640
641          /*
642           * Unconditional Notification is forced by default at the
643           * OPAL level because the use of END ESBs is not supported by
644           * Linux.
645           */
646         rc = xive_native_configure_queue(xc->vp_id, q, priority,
647                                          (__be32 *) qaddr, kvm_eq.qshift, true);
648         if (rc) {
649                 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
650                        priority, xc->server_num, rc);
651                 put_page(page);
652                 return rc;
653         }
654
655         /*
656          * Only restore the queue state when needed. When doing the
657          * H_INT_SET_SOURCE_CONFIG hcall, it should not.
658          */
659         if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
660                 rc = xive_native_set_queue_state(xc->vp_id, priority,
661                                                  kvm_eq.qtoggle,
662                                                  kvm_eq.qindex);
663                 if (rc)
664                         goto error;
665         }
666
667         rc = kvmppc_xive_attach_escalation(vcpu, priority,
668                                            xive->single_escalation);
669 error:
670         if (rc)
671                 kvmppc_xive_native_cleanup_queue(vcpu, priority);
672         return rc;
673 }
674
675 static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
676                                                long eq_idx, u64 addr)
677 {
678         struct kvm *kvm = xive->kvm;
679         struct kvm_vcpu *vcpu;
680         struct kvmppc_xive_vcpu *xc;
681         struct xive_q *q;
682         void __user *ubufp = (u64 __user *) addr;
683         u32 server;
684         u8 priority;
685         struct kvm_ppc_xive_eq kvm_eq;
686         u64 qaddr;
687         u64 qshift;
688         u64 qeoi_page;
689         u32 escalate_irq;
690         u64 qflags;
691         int rc;
692
693         /*
694          * Demangle priority/server tuple from the EQ identifier
695          */
696         priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
697                 KVM_XIVE_EQ_PRIORITY_SHIFT;
698         server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
699                 KVM_XIVE_EQ_SERVER_SHIFT;
700
701         vcpu = kvmppc_xive_find_server(kvm, server);
702         if (!vcpu) {
703                 pr_err("Can't find server %d\n", server);
704                 return -ENOENT;
705         }
706         xc = vcpu->arch.xive_vcpu;
707
708         if (priority != xive_prio_from_guest(priority)) {
709                 pr_err("invalid priority for queue %d for VCPU %d\n",
710                        priority, server);
711                 return -EINVAL;
712         }
713         q = &xc->queues[priority];
714
715         memset(&kvm_eq, 0, sizeof(kvm_eq));
716
717         if (!q->qpage)
718                 return 0;
719
720         rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
721                                         &qeoi_page, &escalate_irq, &qflags);
722         if (rc)
723                 return rc;
724
725         kvm_eq.flags = 0;
726         if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
727                 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
728
729         kvm_eq.qshift = q->guest_qshift;
730         kvm_eq.qaddr  = q->guest_qaddr;
731
732         rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
733                                          &kvm_eq.qindex);
734         if (rc)
735                 return rc;
736
737         pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
738                  __func__, server, priority, kvm_eq.flags,
739                  kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
740
741         if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
742                 return -EFAULT;
743
744         return 0;
745 }
746
747 static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
748 {
749         int i;
750
751         for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
752                 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
753
754                 if (!state->valid)
755                         continue;
756
757                 if (state->act_priority == MASKED)
758                         continue;
759
760                 state->eisn = 0;
761                 state->act_server = 0;
762                 state->act_priority = MASKED;
763                 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
764                 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
765                 if (state->pt_number) {
766                         xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
767                         xive_native_configure_irq(state->pt_number,
768                                                   0, MASKED, 0);
769                 }
770         }
771 }
772
773 static int kvmppc_xive_reset(struct kvmppc_xive *xive)
774 {
775         struct kvm *kvm = xive->kvm;
776         struct kvm_vcpu *vcpu;
777         unsigned int i;
778
779         pr_devel("%s\n", __func__);
780
781         mutex_lock(&xive->lock);
782
783         kvm_for_each_vcpu(i, vcpu, kvm) {
784                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
785                 unsigned int prio;
786
787                 if (!xc)
788                         continue;
789
790                 kvmppc_xive_disable_vcpu_interrupts(vcpu);
791
792                 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
793
794                         /* Single escalation, no queue 7 */
795                         if (prio == 7 && xive->single_escalation)
796                                 break;
797
798                         if (xc->esc_virq[prio]) {
799                                 free_irq(xc->esc_virq[prio], vcpu);
800                                 irq_dispose_mapping(xc->esc_virq[prio]);
801                                 kfree(xc->esc_virq_names[prio]);
802                                 xc->esc_virq[prio] = 0;
803                         }
804
805                         kvmppc_xive_native_cleanup_queue(vcpu, prio);
806                 }
807         }
808
809         for (i = 0; i <= xive->max_sbid; i++) {
810                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
811
812                 if (sb) {
813                         arch_spin_lock(&sb->lock);
814                         kvmppc_xive_reset_sources(sb);
815                         arch_spin_unlock(&sb->lock);
816                 }
817         }
818
819         mutex_unlock(&xive->lock);
820
821         return 0;
822 }
823
824 static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
825 {
826         int j;
827
828         for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
829                 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
830                 struct xive_irq_data *xd;
831                 u32 hw_num;
832
833                 if (!state->valid)
834                         continue;
835
836                 /*
837                  * The struct kvmppc_xive_irq_state reflects the state
838                  * of the EAS configuration and not the state of the
839                  * source. The source is masked setting the PQ bits to
840                  * '-Q', which is what is being done before calling
841                  * the KVM_DEV_XIVE_EQ_SYNC control.
842                  *
843                  * If a source EAS is configured, OPAL syncs the XIVE
844                  * IC of the source and the XIVE IC of the previous
845                  * target if any.
846                  *
847                  * So it should be fine ignoring MASKED sources as
848                  * they have been synced already.
849                  */
850                 if (state->act_priority == MASKED)
851                         continue;
852
853                 kvmppc_xive_select_irq(state, &hw_num, &xd);
854                 xive_native_sync_source(hw_num);
855                 xive_native_sync_queue(hw_num);
856         }
857 }
858
859 static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
860 {
861         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
862         unsigned int prio;
863         int srcu_idx;
864
865         if (!xc)
866                 return -ENOENT;
867
868         for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
869                 struct xive_q *q = &xc->queues[prio];
870
871                 if (!q->qpage)
872                         continue;
873
874                 /* Mark EQ page dirty for migration */
875                 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
876                 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
877                 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
878         }
879         return 0;
880 }
881
882 static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
883 {
884         struct kvm *kvm = xive->kvm;
885         struct kvm_vcpu *vcpu;
886         unsigned int i;
887
888         pr_devel("%s\n", __func__);
889
890         mutex_lock(&xive->lock);
891         for (i = 0; i <= xive->max_sbid; i++) {
892                 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
893
894                 if (sb) {
895                         arch_spin_lock(&sb->lock);
896                         kvmppc_xive_native_sync_sources(sb);
897                         arch_spin_unlock(&sb->lock);
898                 }
899         }
900
901         kvm_for_each_vcpu(i, vcpu, kvm) {
902                 kvmppc_xive_native_vcpu_eq_sync(vcpu);
903         }
904         mutex_unlock(&xive->lock);
905
906         return 0;
907 }
908
909 static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
910                                        struct kvm_device_attr *attr)
911 {
912         struct kvmppc_xive *xive = dev->private;
913
914         switch (attr->group) {
915         case KVM_DEV_XIVE_GRP_CTRL:
916                 switch (attr->attr) {
917                 case KVM_DEV_XIVE_RESET:
918                         return kvmppc_xive_reset(xive);
919                 case KVM_DEV_XIVE_EQ_SYNC:
920                         return kvmppc_xive_native_eq_sync(xive);
921                 }
922                 break;
923         case KVM_DEV_XIVE_GRP_SOURCE:
924                 return kvmppc_xive_native_set_source(xive, attr->attr,
925                                                      attr->addr);
926         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
927                 return kvmppc_xive_native_set_source_config(xive, attr->attr,
928                                                             attr->addr);
929         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
930                 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
931                                                            attr->addr);
932         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
933                 return kvmppc_xive_native_sync_source(xive, attr->attr,
934                                                       attr->addr);
935         }
936         return -ENXIO;
937 }
938
939 static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
940                                        struct kvm_device_attr *attr)
941 {
942         struct kvmppc_xive *xive = dev->private;
943
944         switch (attr->group) {
945         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
946                 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
947                                                            attr->addr);
948         }
949         return -ENXIO;
950 }
951
952 static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
953                                        struct kvm_device_attr *attr)
954 {
955         switch (attr->group) {
956         case KVM_DEV_XIVE_GRP_CTRL:
957                 switch (attr->attr) {
958                 case KVM_DEV_XIVE_RESET:
959                 case KVM_DEV_XIVE_EQ_SYNC:
960                         return 0;
961                 }
962                 break;
963         case KVM_DEV_XIVE_GRP_SOURCE:
964         case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
965         case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
966                 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
967                     attr->attr < KVMPPC_XIVE_NR_IRQS)
968                         return 0;
969                 break;
970         case KVM_DEV_XIVE_GRP_EQ_CONFIG:
971                 return 0;
972         }
973         return -ENXIO;
974 }
975
976 /*
977  * Called when device fd is closed.  kvm->lock is held.
978  */
979 static void kvmppc_xive_native_release(struct kvm_device *dev)
980 {
981         struct kvmppc_xive *xive = dev->private;
982         struct kvm *kvm = xive->kvm;
983         struct kvm_vcpu *vcpu;
984         int i;
985
986         pr_devel("Releasing xive native device\n");
987
988         /*
989          * Clear the KVM device file address_space which is used to
990          * unmap the ESB pages when a device is passed-through.
991          */
992         mutex_lock(&xive->mapping_lock);
993         xive->mapping = NULL;
994         mutex_unlock(&xive->mapping_lock);
995
996         /*
997          * Since this is the device release function, we know that
998          * userspace does not have any open fd or mmap referring to
999          * the device.  Therefore there can not be any of the
1000          * device attribute set/get, mmap, or page fault functions
1001          * being executed concurrently, and similarly, the
1002          * connect_vcpu and set/clr_mapped functions also cannot
1003          * be being executed.
1004          */
1005
1006         debugfs_remove(xive->dentry);
1007
1008         /*
1009          * We should clean up the vCPU interrupt presenters first.
1010          */
1011         kvm_for_each_vcpu(i, vcpu, kvm) {
1012                 /*
1013                  * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1014                  * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
1015                  * Holding the vcpu->mutex also means that the vcpu cannot
1016                  * be executing the KVM_RUN ioctl, and therefore it cannot
1017                  * be executing the XIVE push or pull code or accessing
1018                  * the XIVE MMIO regions.
1019                  */
1020                 mutex_lock(&vcpu->mutex);
1021                 kvmppc_xive_native_cleanup_vcpu(vcpu);
1022                 mutex_unlock(&vcpu->mutex);
1023         }
1024
1025         /*
1026          * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1027          * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1028          * against xive code getting called during vcpu execution or
1029          * set/get one_reg operations.
1030          */
1031         kvm->arch.xive = NULL;
1032
1033         for (i = 0; i <= xive->max_sbid; i++) {
1034                 if (xive->src_blocks[i])
1035                         kvmppc_xive_free_sources(xive->src_blocks[i]);
1036                 kfree(xive->src_blocks[i]);
1037                 xive->src_blocks[i] = NULL;
1038         }
1039
1040         if (xive->vp_base != XIVE_INVALID_VP)
1041                 xive_native_free_vp_block(xive->vp_base);
1042
1043         /*
1044          * A reference of the kvmppc_xive pointer is now kept under
1045          * the xive_devices struct of the machine for reuse. It is
1046          * freed when the VM is destroyed for now until we fix all the
1047          * execution paths.
1048          */
1049
1050         kfree(dev);
1051 }
1052
1053 /*
1054  * Create a XIVE device.  kvm->lock is held.
1055  */
1056 static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1057 {
1058         struct kvmppc_xive *xive;
1059         struct kvm *kvm = dev->kvm;
1060         int ret = 0;
1061
1062         pr_devel("Creating xive native device\n");
1063
1064         if (kvm->arch.xive)
1065                 return -EEXIST;
1066
1067         xive = kvmppc_xive_get_device(kvm, type);
1068         if (!xive)
1069                 return -ENOMEM;
1070
1071         dev->private = xive;
1072         xive->dev = dev;
1073         xive->kvm = kvm;
1074         kvm->arch.xive = xive;
1075         mutex_init(&xive->mapping_lock);
1076         mutex_init(&xive->lock);
1077
1078         /*
1079          * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
1080          * a default. Getting the max number of CPUs the VM was
1081          * configured with would improve our usage of the XIVE VP space.
1082          */
1083         xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1084         pr_devel("VP_Base=%x\n", xive->vp_base);
1085
1086         if (xive->vp_base == XIVE_INVALID_VP)
1087                 ret = -ENXIO;
1088
1089         xive->single_escalation = xive_native_has_single_escalation();
1090         xive->ops = &kvmppc_xive_native_ops;
1091
1092         if (ret)
1093                 kfree(xive);
1094
1095         return ret;
1096 }
1097
1098 /*
1099  * Interrupt Pending Buffer (IPB) offset
1100  */
1101 #define TM_IPB_SHIFT 40
1102 #define TM_IPB_MASK  (((u64) 0xFF) << TM_IPB_SHIFT)
1103
1104 int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1105 {
1106         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1107         u64 opal_state;
1108         int rc;
1109
1110         if (!kvmppc_xive_enabled(vcpu))
1111                 return -EPERM;
1112
1113         if (!xc)
1114                 return -ENOENT;
1115
1116         /* Thread context registers. We only care about IPB and CPPR */
1117         val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1118
1119         /* Get the VP state from OPAL */
1120         rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1121         if (rc)
1122                 return rc;
1123
1124         /*
1125          * Capture the backup of IPB register in the NVT structure and
1126          * merge it in our KVM VP state.
1127          */
1128         val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1129
1130         pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1131                  __func__,
1132                  vcpu->arch.xive_saved_state.nsr,
1133                  vcpu->arch.xive_saved_state.cppr,
1134                  vcpu->arch.xive_saved_state.ipb,
1135                  vcpu->arch.xive_saved_state.pipr,
1136                  vcpu->arch.xive_saved_state.w01,
1137                  (u32) vcpu->arch.xive_cam_word, opal_state);
1138
1139         return 0;
1140 }
1141
1142 int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1143 {
1144         struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1145         struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1146
1147         pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1148                  val->xive_timaval[0], val->xive_timaval[1]);
1149
1150         if (!kvmppc_xive_enabled(vcpu))
1151                 return -EPERM;
1152
1153         if (!xc || !xive)
1154                 return -ENOENT;
1155
1156         /* We can't update the state of a "pushed" VCPU  */
1157         if (WARN_ON(vcpu->arch.xive_pushed))
1158                 return -EBUSY;
1159
1160         /*
1161          * Restore the thread context registers. IPB and CPPR should
1162          * be the only ones that matter.
1163          */
1164         vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1165
1166         /*
1167          * There is no need to restore the XIVE internal state (IPB
1168          * stored in the NVT) as the IPB register was merged in KVM VP
1169          * state when captured.
1170          */
1171         return 0;
1172 }
1173
1174 static int xive_native_debug_show(struct seq_file *m, void *private)
1175 {
1176         struct kvmppc_xive *xive = m->private;
1177         struct kvm *kvm = xive->kvm;
1178         struct kvm_vcpu *vcpu;
1179         unsigned int i;
1180
1181         if (!kvm)
1182                 return 0;
1183
1184         seq_puts(m, "=========\nVCPU state\n=========\n");
1185
1186         kvm_for_each_vcpu(i, vcpu, kvm) {
1187                 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1188
1189                 if (!xc)
1190                         continue;
1191
1192                 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1193                            xc->server_num,
1194                            vcpu->arch.xive_saved_state.nsr,
1195                            vcpu->arch.xive_saved_state.cppr,
1196                            vcpu->arch.xive_saved_state.ipb,
1197                            vcpu->arch.xive_saved_state.pipr,
1198                            vcpu->arch.xive_saved_state.w01,
1199                            (u32) vcpu->arch.xive_cam_word);
1200
1201                 kvmppc_xive_debug_show_queues(m, vcpu);
1202         }
1203
1204         return 0;
1205 }
1206
1207 static int xive_native_debug_open(struct inode *inode, struct file *file)
1208 {
1209         return single_open(file, xive_native_debug_show, inode->i_private);
1210 }
1211
1212 static const struct file_operations xive_native_debug_fops = {
1213         .open = xive_native_debug_open,
1214         .read = seq_read,
1215         .llseek = seq_lseek,
1216         .release = single_release,
1217 };
1218
1219 static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1220 {
1221         char *name;
1222
1223         name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1224         if (!name) {
1225                 pr_err("%s: no memory for name\n", __func__);
1226                 return;
1227         }
1228
1229         xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1230                                            xive, &xive_native_debug_fops);
1231
1232         pr_debug("%s: created %s\n", __func__, name);
1233         kfree(name);
1234 }
1235
1236 static void kvmppc_xive_native_init(struct kvm_device *dev)
1237 {
1238         struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1239
1240         /* Register some debug interfaces */
1241         xive_native_debugfs_init(xive);
1242 }
1243
1244 struct kvm_device_ops kvm_xive_native_ops = {
1245         .name = "kvm-xive-native",
1246         .create = kvmppc_xive_native_create,
1247         .init = kvmppc_xive_native_init,
1248         .release = kvmppc_xive_native_release,
1249         .set_attr = kvmppc_xive_native_set_attr,
1250         .get_attr = kvmppc_xive_native_get_attr,
1251         .has_attr = kvmppc_xive_native_has_attr,
1252         .mmap = kvmppc_xive_native_mmap,
1253 };
1254
1255 void kvmppc_xive_native_init_module(void)
1256 {
1257         ;
1258 }
1259
1260 void kvmppc_xive_native_exit_module(void)
1261 {
1262         ;
1263 }