KVM: PPC: Fix nested guest RC bits update
[linux-2.6-microblaze.git] / arch / powerpc / kvm / mpic.c
1 /*
2  * OpenPIC emulation
3  *
4  * Copyright (c) 2004 Jocelyn Mayer
5  *               2011 Alexander Graf
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25
26 #include <linux/slab.h>
27 #include <linux/mutex.h>
28 #include <linux/kvm_host.h>
29 #include <linux/errno.h>
30 #include <linux/fs.h>
31 #include <linux/anon_inodes.h>
32 #include <linux/uaccess.h>
33 #include <asm/mpic.h>
34 #include <asm/kvm_para.h>
35 #include <asm/kvm_ppc.h>
36 #include <kvm/iodev.h>
37
38 #define MAX_CPU     32
39 #define MAX_SRC     256
40 #define MAX_TMR     4
41 #define MAX_IPI     4
42 #define MAX_MSI     8
43 #define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
44 #define VID         0x03        /* MPIC version ID */
45
46 /* OpenPIC capability flags */
47 #define OPENPIC_FLAG_IDR_CRIT     (1 << 0)
48 #define OPENPIC_FLAG_ILR          (2 << 0)
49
50 /* OpenPIC address map */
51 #define OPENPIC_REG_SIZE             0x40000
52 #define OPENPIC_GLB_REG_START        0x0
53 #define OPENPIC_GLB_REG_SIZE         0x10F0
54 #define OPENPIC_TMR_REG_START        0x10F0
55 #define OPENPIC_TMR_REG_SIZE         0x220
56 #define OPENPIC_MSI_REG_START        0x1600
57 #define OPENPIC_MSI_REG_SIZE         0x200
58 #define OPENPIC_SUMMARY_REG_START    0x3800
59 #define OPENPIC_SUMMARY_REG_SIZE     0x800
60 #define OPENPIC_SRC_REG_START        0x10000
61 #define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
62 #define OPENPIC_CPU_REG_START        0x20000
63 #define OPENPIC_CPU_REG_SIZE         (0x100 + ((MAX_CPU - 1) * 0x1000))
64
65 struct fsl_mpic_info {
66         int max_ext;
67 };
68
69 static struct fsl_mpic_info fsl_mpic_20 = {
70         .max_ext = 12,
71 };
72
73 static struct fsl_mpic_info fsl_mpic_42 = {
74         .max_ext = 12,
75 };
76
77 #define FRR_NIRQ_SHIFT    16
78 #define FRR_NCPU_SHIFT     8
79 #define FRR_VID_SHIFT      0
80
81 #define VID_REVISION_1_2   2
82 #define VID_REVISION_1_3   3
83
84 #define VIR_GENERIC      0x00000000     /* Generic Vendor ID */
85
86 #define GCR_RESET        0x80000000
87 #define GCR_MODE_PASS    0x00000000
88 #define GCR_MODE_MIXED   0x20000000
89 #define GCR_MODE_PROXY   0x60000000
90
91 #define TBCR_CI           0x80000000    /* count inhibit */
92 #define TCCR_TOG          0x80000000    /* toggles when decrement to zero */
93
94 #define IDR_EP_SHIFT      31
95 #define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
96 #define IDR_CI0_SHIFT     30
97 #define IDR_CI1_SHIFT     29
98 #define IDR_P1_SHIFT      1
99 #define IDR_P0_SHIFT      0
100
101 #define ILR_INTTGT_MASK   0x000000ff
102 #define ILR_INTTGT_INT    0x00
103 #define ILR_INTTGT_CINT   0x01  /* critical */
104 #define ILR_INTTGT_MCP    0x02  /* machine check */
105 #define NUM_OUTPUTS       3
106
107 #define MSIIR_OFFSET       0x140
108 #define MSIIR_SRS_SHIFT    29
109 #define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
110 #define MSIIR_IBS_SHIFT    24
111 #define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
112
113 static int get_current_cpu(void)
114 {
115 #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
116         struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
117         return vcpu ? vcpu->arch.irq_cpu_id : -1;
118 #else
119         /* XXX */
120         return -1;
121 #endif
122 }
123
124 static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
125                                       u32 val, int idx);
126 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
127                                      u32 *ptr, int idx);
128 static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
129                                     uint32_t val);
130
131 enum irq_type {
132         IRQ_TYPE_NORMAL = 0,
133         IRQ_TYPE_FSLINT,        /* FSL internal interrupt -- level only */
134         IRQ_TYPE_FSLSPECIAL,    /* FSL timer/IPI interrupt, edge, no polarity */
135 };
136
137 struct irq_queue {
138         /* Round up to the nearest 64 IRQs so that the queue length
139          * won't change when moving between 32 and 64 bit hosts.
140          */
141         unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) & ~63)];
142         int next;
143         int priority;
144 };
145
146 struct irq_source {
147         uint32_t ivpr;          /* IRQ vector/priority register */
148         uint32_t idr;           /* IRQ destination register */
149         uint32_t destmask;      /* bitmap of CPU destinations */
150         int last_cpu;
151         int output;             /* IRQ level, e.g. ILR_INTTGT_INT */
152         int pending;            /* TRUE if IRQ is pending */
153         enum irq_type type;
154         bool level:1;           /* level-triggered */
155         bool nomask:1;  /* critical interrupts ignore mask on some FSL MPICs */
156 };
157
158 #define IVPR_MASK_SHIFT       31
159 #define IVPR_MASK_MASK        (1 << IVPR_MASK_SHIFT)
160 #define IVPR_ACTIVITY_SHIFT   30
161 #define IVPR_ACTIVITY_MASK    (1 << IVPR_ACTIVITY_SHIFT)
162 #define IVPR_MODE_SHIFT       29
163 #define IVPR_MODE_MASK        (1 << IVPR_MODE_SHIFT)
164 #define IVPR_POLARITY_SHIFT   23
165 #define IVPR_POLARITY_MASK    (1 << IVPR_POLARITY_SHIFT)
166 #define IVPR_SENSE_SHIFT      22
167 #define IVPR_SENSE_MASK       (1 << IVPR_SENSE_SHIFT)
168
169 #define IVPR_PRIORITY_MASK     (0xF << 16)
170 #define IVPR_PRIORITY(_ivprr_) ((int)(((_ivprr_) & IVPR_PRIORITY_MASK) >> 16))
171 #define IVPR_VECTOR(opp, _ivprr_) ((_ivprr_) & (opp)->vector_mask)
172
173 /* IDR[EP/CI] are only for FSL MPIC prior to v4.0 */
174 #define IDR_EP      0x80000000  /* external pin */
175 #define IDR_CI      0x40000000  /* critical interrupt */
176
177 struct irq_dest {
178         struct kvm_vcpu *vcpu;
179
180         int32_t ctpr;           /* CPU current task priority */
181         struct irq_queue raised;
182         struct irq_queue servicing;
183
184         /* Count of IRQ sources asserting on non-INT outputs */
185         uint32_t outputs_active[NUM_OUTPUTS];
186 };
187
188 #define MAX_MMIO_REGIONS 10
189
190 struct openpic {
191         struct kvm *kvm;
192         struct kvm_device *dev;
193         struct kvm_io_device mmio;
194         const struct mem_reg *mmio_regions[MAX_MMIO_REGIONS];
195         int num_mmio_regions;
196
197         gpa_t reg_base;
198         spinlock_t lock;
199
200         /* Behavior control */
201         struct fsl_mpic_info *fsl;
202         uint32_t model;
203         uint32_t flags;
204         uint32_t nb_irqs;
205         uint32_t vid;
206         uint32_t vir;           /* Vendor identification register */
207         uint32_t vector_mask;
208         uint32_t tfrr_reset;
209         uint32_t ivpr_reset;
210         uint32_t idr_reset;
211         uint32_t brr1;
212         uint32_t mpic_mode_mask;
213
214         /* Global registers */
215         uint32_t frr;           /* Feature reporting register */
216         uint32_t gcr;           /* Global configuration register  */
217         uint32_t pir;           /* Processor initialization register */
218         uint32_t spve;          /* Spurious vector register */
219         uint32_t tfrr;          /* Timer frequency reporting register */
220         /* Source registers */
221         struct irq_source src[MAX_IRQ];
222         /* Local registers per output pin */
223         struct irq_dest dst[MAX_CPU];
224         uint32_t nb_cpus;
225         /* Timer registers */
226         struct {
227                 uint32_t tccr;  /* Global timer current count register */
228                 uint32_t tbcr;  /* Global timer base count register */
229         } timers[MAX_TMR];
230         /* Shared MSI registers */
231         struct {
232                 uint32_t msir;  /* Shared Message Signaled Interrupt Register */
233         } msi[MAX_MSI];
234         uint32_t max_irq;
235         uint32_t irq_ipi0;
236         uint32_t irq_tim0;
237         uint32_t irq_msi;
238 };
239
240
241 static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
242                            int output)
243 {
244         struct kvm_interrupt irq = {
245                 .irq = KVM_INTERRUPT_SET_LEVEL,
246         };
247
248         if (!dst->vcpu) {
249                 pr_debug("%s: destination cpu %d does not exist\n",
250                          __func__, (int)(dst - &opp->dst[0]));
251                 return;
252         }
253
254         pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
255                 output);
256
257         if (output != ILR_INTTGT_INT)   /* TODO */
258                 return;
259
260         kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
261 }
262
263 static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
264                            int output)
265 {
266         if (!dst->vcpu) {
267                 pr_debug("%s: destination cpu %d does not exist\n",
268                          __func__, (int)(dst - &opp->dst[0]));
269                 return;
270         }
271
272         pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
273                 output);
274
275         if (output != ILR_INTTGT_INT)   /* TODO */
276                 return;
277
278         kvmppc_core_dequeue_external(dst->vcpu);
279 }
280
281 static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
282 {
283         set_bit(n_IRQ, q->queue);
284 }
285
286 static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
287 {
288         clear_bit(n_IRQ, q->queue);
289 }
290
291 static void IRQ_check(struct openpic *opp, struct irq_queue *q)
292 {
293         int irq = -1;
294         int next = -1;
295         int priority = -1;
296
297         for (;;) {
298                 irq = find_next_bit(q->queue, opp->max_irq, irq + 1);
299                 if (irq == opp->max_irq)
300                         break;
301
302                 pr_debug("IRQ_check: irq %d set ivpr_pr=%d pr=%d\n",
303                         irq, IVPR_PRIORITY(opp->src[irq].ivpr), priority);
304
305                 if (IVPR_PRIORITY(opp->src[irq].ivpr) > priority) {
306                         next = irq;
307                         priority = IVPR_PRIORITY(opp->src[irq].ivpr);
308                 }
309         }
310
311         q->next = next;
312         q->priority = priority;
313 }
314
315 static int IRQ_get_next(struct openpic *opp, struct irq_queue *q)
316 {
317         /* XXX: optimize */
318         IRQ_check(opp, q);
319
320         return q->next;
321 }
322
323 static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
324                            bool active, bool was_active)
325 {
326         struct irq_dest *dst;
327         struct irq_source *src;
328         int priority;
329
330         dst = &opp->dst[n_CPU];
331         src = &opp->src[n_IRQ];
332
333         pr_debug("%s: IRQ %d active %d was %d\n",
334                 __func__, n_IRQ, active, was_active);
335
336         if (src->output != ILR_INTTGT_INT) {
337                 pr_debug("%s: output %d irq %d active %d was %d count %d\n",
338                         __func__, src->output, n_IRQ, active, was_active,
339                         dst->outputs_active[src->output]);
340
341                 /* On Freescale MPIC, critical interrupts ignore priority,
342                  * IACK, EOI, etc.  Before MPIC v4.1 they also ignore
343                  * masking.
344                  */
345                 if (active) {
346                         if (!was_active &&
347                             dst->outputs_active[src->output]++ == 0) {
348                                 pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
349                                         __func__, src->output, n_CPU, n_IRQ);
350                                 mpic_irq_raise(opp, dst, src->output);
351                         }
352                 } else {
353                         if (was_active &&
354                             --dst->outputs_active[src->output] == 0) {
355                                 pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
356                                         __func__, src->output, n_CPU, n_IRQ);
357                                 mpic_irq_lower(opp, dst, src->output);
358                         }
359                 }
360
361                 return;
362         }
363
364         priority = IVPR_PRIORITY(src->ivpr);
365
366         /* Even if the interrupt doesn't have enough priority,
367          * it is still raised, in case ctpr is lowered later.
368          */
369         if (active)
370                 IRQ_setbit(&dst->raised, n_IRQ);
371         else
372                 IRQ_resetbit(&dst->raised, n_IRQ);
373
374         IRQ_check(opp, &dst->raised);
375
376         if (active && priority <= dst->ctpr) {
377                 pr_debug("%s: IRQ %d priority %d too low for ctpr %d on CPU %d\n",
378                         __func__, n_IRQ, priority, dst->ctpr, n_CPU);
379                 active = 0;
380         }
381
382         if (active) {
383                 if (IRQ_get_next(opp, &dst->servicing) >= 0 &&
384                     priority <= dst->servicing.priority) {
385                         pr_debug("%s: IRQ %d is hidden by servicing IRQ %d on CPU %d\n",
386                                 __func__, n_IRQ, dst->servicing.next, n_CPU);
387                 } else {
388                         pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
389                                 __func__, n_CPU, n_IRQ, dst->raised.next);
390                         mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
391                 }
392         } else {
393                 IRQ_get_next(opp, &dst->servicing);
394                 if (dst->raised.priority > dst->ctpr &&
395                     dst->raised.priority > dst->servicing.priority) {
396                         pr_debug("%s: IRQ %d inactive, IRQ %d prio %d above %d/%d, CPU %d\n",
397                                 __func__, n_IRQ, dst->raised.next,
398                                 dst->raised.priority, dst->ctpr,
399                                 dst->servicing.priority, n_CPU);
400                         /* IRQ line stays asserted */
401                 } else {
402                         pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
403                                 __func__, n_IRQ, dst->ctpr,
404                                 dst->servicing.priority, n_CPU);
405                         mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
406                 }
407         }
408 }
409
410 /* update pic state because registers for n_IRQ have changed value */
411 static void openpic_update_irq(struct openpic *opp, int n_IRQ)
412 {
413         struct irq_source *src;
414         bool active, was_active;
415         int i;
416
417         src = &opp->src[n_IRQ];
418         active = src->pending;
419
420         if ((src->ivpr & IVPR_MASK_MASK) && !src->nomask) {
421                 /* Interrupt source is disabled */
422                 pr_debug("%s: IRQ %d is disabled\n", __func__, n_IRQ);
423                 active = false;
424         }
425
426         was_active = !!(src->ivpr & IVPR_ACTIVITY_MASK);
427
428         /*
429          * We don't have a similar check for already-active because
430          * ctpr may have changed and we need to withdraw the interrupt.
431          */
432         if (!active && !was_active) {
433                 pr_debug("%s: IRQ %d is already inactive\n", __func__, n_IRQ);
434                 return;
435         }
436
437         if (active)
438                 src->ivpr |= IVPR_ACTIVITY_MASK;
439         else
440                 src->ivpr &= ~IVPR_ACTIVITY_MASK;
441
442         if (src->destmask == 0) {
443                 /* No target */
444                 pr_debug("%s: IRQ %d has no target\n", __func__, n_IRQ);
445                 return;
446         }
447
448         if (src->destmask == (1 << src->last_cpu)) {
449                 /* Only one CPU is allowed to receive this IRQ */
450                 IRQ_local_pipe(opp, src->last_cpu, n_IRQ, active, was_active);
451         } else if (!(src->ivpr & IVPR_MODE_MASK)) {
452                 /* Directed delivery mode */
453                 for (i = 0; i < opp->nb_cpus; i++) {
454                         if (src->destmask & (1 << i)) {
455                                 IRQ_local_pipe(opp, i, n_IRQ, active,
456                                                was_active);
457                         }
458                 }
459         } else {
460                 /* Distributed delivery mode */
461                 for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
462                         if (i == opp->nb_cpus)
463                                 i = 0;
464
465                         if (src->destmask & (1 << i)) {
466                                 IRQ_local_pipe(opp, i, n_IRQ, active,
467                                                was_active);
468                                 src->last_cpu = i;
469                                 break;
470                         }
471                 }
472         }
473 }
474
475 static void openpic_set_irq(void *opaque, int n_IRQ, int level)
476 {
477         struct openpic *opp = opaque;
478         struct irq_source *src;
479
480         if (n_IRQ >= MAX_IRQ) {
481                 WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
482                 return;
483         }
484
485         src = &opp->src[n_IRQ];
486         pr_debug("openpic: set irq %d = %d ivpr=0x%08x\n",
487                 n_IRQ, level, src->ivpr);
488         if (src->level) {
489                 /* level-sensitive irq */
490                 src->pending = level;
491                 openpic_update_irq(opp, n_IRQ);
492         } else {
493                 /* edge-sensitive irq */
494                 if (level) {
495                         src->pending = 1;
496                         openpic_update_irq(opp, n_IRQ);
497                 }
498
499                 if (src->output != ILR_INTTGT_INT) {
500                         /* Edge-triggered interrupts shouldn't be used
501                          * with non-INT delivery, but just in case,
502                          * try to make it do something sane rather than
503                          * cause an interrupt storm.  This is close to
504                          * what you'd probably see happen in real hardware.
505                          */
506                         src->pending = 0;
507                         openpic_update_irq(opp, n_IRQ);
508                 }
509         }
510 }
511
512 static void openpic_reset(struct openpic *opp)
513 {
514         int i;
515
516         opp->gcr = GCR_RESET;
517         /* Initialise controller registers */
518         opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
519             (opp->vid << FRR_VID_SHIFT);
520
521         opp->pir = 0;
522         opp->spve = -1 & opp->vector_mask;
523         opp->tfrr = opp->tfrr_reset;
524         /* Initialise IRQ sources */
525         for (i = 0; i < opp->max_irq; i++) {
526                 opp->src[i].ivpr = opp->ivpr_reset;
527
528                 switch (opp->src[i].type) {
529                 case IRQ_TYPE_NORMAL:
530                         opp->src[i].level =
531                             !!(opp->ivpr_reset & IVPR_SENSE_MASK);
532                         break;
533
534                 case IRQ_TYPE_FSLINT:
535                         opp->src[i].ivpr |= IVPR_POLARITY_MASK;
536                         break;
537
538                 case IRQ_TYPE_FSLSPECIAL:
539                         break;
540                 }
541
542                 write_IRQreg_idr(opp, i, opp->idr_reset);
543         }
544         /* Initialise IRQ destinations */
545         for (i = 0; i < MAX_CPU; i++) {
546                 opp->dst[i].ctpr = 15;
547                 memset(&opp->dst[i].raised, 0, sizeof(struct irq_queue));
548                 opp->dst[i].raised.next = -1;
549                 memset(&opp->dst[i].servicing, 0, sizeof(struct irq_queue));
550                 opp->dst[i].servicing.next = -1;
551         }
552         /* Initialise timers */
553         for (i = 0; i < MAX_TMR; i++) {
554                 opp->timers[i].tccr = 0;
555                 opp->timers[i].tbcr = TBCR_CI;
556         }
557         /* Go out of RESET state */
558         opp->gcr = 0;
559 }
560
561 static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
562 {
563         return opp->src[n_IRQ].idr;
564 }
565
566 static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
567 {
568         if (opp->flags & OPENPIC_FLAG_ILR)
569                 return opp->src[n_IRQ].output;
570
571         return 0xffffffff;
572 }
573
574 static inline uint32_t read_IRQreg_ivpr(struct openpic *opp, int n_IRQ)
575 {
576         return opp->src[n_IRQ].ivpr;
577 }
578
579 static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
580                                     uint32_t val)
581 {
582         struct irq_source *src = &opp->src[n_IRQ];
583         uint32_t normal_mask = (1UL << opp->nb_cpus) - 1;
584         uint32_t crit_mask = 0;
585         uint32_t mask = normal_mask;
586         int crit_shift = IDR_EP_SHIFT - opp->nb_cpus;
587         int i;
588
589         if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
590                 crit_mask = mask << crit_shift;
591                 mask |= crit_mask | IDR_EP;
592         }
593
594         src->idr = val & mask;
595         pr_debug("Set IDR %d to 0x%08x\n", n_IRQ, src->idr);
596
597         if (opp->flags & OPENPIC_FLAG_IDR_CRIT) {
598                 if (src->idr & crit_mask) {
599                         if (src->idr & normal_mask) {
600                                 pr_debug("%s: IRQ configured for multiple output types, using critical\n",
601                                         __func__);
602                         }
603
604                         src->output = ILR_INTTGT_CINT;
605                         src->nomask = true;
606                         src->destmask = 0;
607
608                         for (i = 0; i < opp->nb_cpus; i++) {
609                                 int n_ci = IDR_CI0_SHIFT - i;
610
611                                 if (src->idr & (1UL << n_ci))
612                                         src->destmask |= 1UL << i;
613                         }
614                 } else {
615                         src->output = ILR_INTTGT_INT;
616                         src->nomask = false;
617                         src->destmask = src->idr & normal_mask;
618                 }
619         } else {
620                 src->destmask = src->idr;
621         }
622 }
623
624 static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
625                                     uint32_t val)
626 {
627         if (opp->flags & OPENPIC_FLAG_ILR) {
628                 struct irq_source *src = &opp->src[n_IRQ];
629
630                 src->output = val & ILR_INTTGT_MASK;
631                 pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
632                         src->output);
633
634                 /* TODO: on MPIC v4.0 only, set nomask for non-INT */
635         }
636 }
637
638 static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
639                                      uint32_t val)
640 {
641         uint32_t mask;
642
643         /* NOTE when implementing newer FSL MPIC models: starting with v4.0,
644          * the polarity bit is read-only on internal interrupts.
645          */
646         mask = IVPR_MASK_MASK | IVPR_PRIORITY_MASK | IVPR_SENSE_MASK |
647             IVPR_POLARITY_MASK | opp->vector_mask;
648
649         /* ACTIVITY bit is read-only */
650         opp->src[n_IRQ].ivpr =
651             (opp->src[n_IRQ].ivpr & IVPR_ACTIVITY_MASK) | (val & mask);
652
653         /* For FSL internal interrupts, The sense bit is reserved and zero,
654          * and the interrupt is always level-triggered.  Timers and IPIs
655          * have no sense or polarity bits, and are edge-triggered.
656          */
657         switch (opp->src[n_IRQ].type) {
658         case IRQ_TYPE_NORMAL:
659                 opp->src[n_IRQ].level =
660                     !!(opp->src[n_IRQ].ivpr & IVPR_SENSE_MASK);
661                 break;
662
663         case IRQ_TYPE_FSLINT:
664                 opp->src[n_IRQ].ivpr &= ~IVPR_SENSE_MASK;
665                 break;
666
667         case IRQ_TYPE_FSLSPECIAL:
668                 opp->src[n_IRQ].ivpr &= ~(IVPR_POLARITY_MASK | IVPR_SENSE_MASK);
669                 break;
670         }
671
672         openpic_update_irq(opp, n_IRQ);
673         pr_debug("Set IVPR %d to 0x%08x -> 0x%08x\n", n_IRQ, val,
674                 opp->src[n_IRQ].ivpr);
675 }
676
677 static void openpic_gcr_write(struct openpic *opp, uint64_t val)
678 {
679         if (val & GCR_RESET) {
680                 openpic_reset(opp);
681                 return;
682         }
683
684         opp->gcr &= ~opp->mpic_mode_mask;
685         opp->gcr |= val & opp->mpic_mode_mask;
686 }
687
688 static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
689 {
690         struct openpic *opp = opaque;
691         int err = 0;
692
693         pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
694         if (addr & 0xF)
695                 return 0;
696
697         switch (addr) {
698         case 0x00:      /* Block Revision Register1 (BRR1) is Readonly */
699                 break;
700         case 0x40:
701         case 0x50:
702         case 0x60:
703         case 0x70:
704         case 0x80:
705         case 0x90:
706         case 0xA0:
707         case 0xB0:
708                 err = openpic_cpu_write_internal(opp, addr, val,
709                                                  get_current_cpu());
710                 break;
711         case 0x1000:            /* FRR */
712                 break;
713         case 0x1020:            /* GCR */
714                 openpic_gcr_write(opp, val);
715                 break;
716         case 0x1080:            /* VIR */
717                 break;
718         case 0x1090:            /* PIR */
719                 /*
720                  * This register is used to reset a CPU core --
721                  * let userspace handle it.
722                  */
723                 err = -ENXIO;
724                 break;
725         case 0x10A0:            /* IPI_IVPR */
726         case 0x10B0:
727         case 0x10C0:
728         case 0x10D0: {
729                 int idx;
730                 idx = (addr - 0x10A0) >> 4;
731                 write_IRQreg_ivpr(opp, opp->irq_ipi0 + idx, val);
732                 break;
733         }
734         case 0x10E0:            /* SPVE */
735                 opp->spve = val & opp->vector_mask;
736                 break;
737         default:
738                 break;
739         }
740
741         return err;
742 }
743
744 static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
745 {
746         struct openpic *opp = opaque;
747         u32 retval;
748         int err = 0;
749
750         pr_debug("%s: addr %#llx\n", __func__, addr);
751         retval = 0xFFFFFFFF;
752         if (addr & 0xF)
753                 goto out;
754
755         switch (addr) {
756         case 0x1000:            /* FRR */
757                 retval = opp->frr;
758                 retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
759                 break;
760         case 0x1020:            /* GCR */
761                 retval = opp->gcr;
762                 break;
763         case 0x1080:            /* VIR */
764                 retval = opp->vir;
765                 break;
766         case 0x1090:            /* PIR */
767                 retval = 0x00000000;
768                 break;
769         case 0x00:              /* Block Revision Register1 (BRR1) */
770                 retval = opp->brr1;
771                 break;
772         case 0x40:
773         case 0x50:
774         case 0x60:
775         case 0x70:
776         case 0x80:
777         case 0x90:
778         case 0xA0:
779         case 0xB0:
780                 err = openpic_cpu_read_internal(opp, addr,
781                         &retval, get_current_cpu());
782                 break;
783         case 0x10A0:            /* IPI_IVPR */
784         case 0x10B0:
785         case 0x10C0:
786         case 0x10D0:
787                 {
788                         int idx;
789                         idx = (addr - 0x10A0) >> 4;
790                         retval = read_IRQreg_ivpr(opp, opp->irq_ipi0 + idx);
791                 }
792                 break;
793         case 0x10E0:            /* SPVE */
794                 retval = opp->spve;
795                 break;
796         default:
797                 break;
798         }
799
800 out:
801         pr_debug("%s: => 0x%08x\n", __func__, retval);
802         *ptr = retval;
803         return err;
804 }
805
806 static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
807 {
808         struct openpic *opp = opaque;
809         int idx;
810
811         addr += 0x10f0;
812
813         pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
814         if (addr & 0xF)
815                 return 0;
816
817         if (addr == 0x10f0) {
818                 /* TFRR */
819                 opp->tfrr = val;
820                 return 0;
821         }
822
823         idx = (addr >> 6) & 0x3;
824         addr = addr & 0x30;
825
826         switch (addr & 0x30) {
827         case 0x00:              /* TCCR */
828                 break;
829         case 0x10:              /* TBCR */
830                 if ((opp->timers[idx].tccr & TCCR_TOG) != 0 &&
831                     (val & TBCR_CI) == 0 &&
832                     (opp->timers[idx].tbcr & TBCR_CI) != 0)
833                         opp->timers[idx].tccr &= ~TCCR_TOG;
834
835                 opp->timers[idx].tbcr = val;
836                 break;
837         case 0x20:              /* TVPR */
838                 write_IRQreg_ivpr(opp, opp->irq_tim0 + idx, val);
839                 break;
840         case 0x30:              /* TDR */
841                 write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
842                 break;
843         }
844
845         return 0;
846 }
847
848 static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
849 {
850         struct openpic *opp = opaque;
851         uint32_t retval = -1;
852         int idx;
853
854         pr_debug("%s: addr %#llx\n", __func__, addr);
855         if (addr & 0xF)
856                 goto out;
857
858         idx = (addr >> 6) & 0x3;
859         if (addr == 0x0) {
860                 /* TFRR */
861                 retval = opp->tfrr;
862                 goto out;
863         }
864
865         switch (addr & 0x30) {
866         case 0x00:              /* TCCR */
867                 retval = opp->timers[idx].tccr;
868                 break;
869         case 0x10:              /* TBCR */
870                 retval = opp->timers[idx].tbcr;
871                 break;
872         case 0x20:              /* TIPV */
873                 retval = read_IRQreg_ivpr(opp, opp->irq_tim0 + idx);
874                 break;
875         case 0x30:              /* TIDE (TIDR) */
876                 retval = read_IRQreg_idr(opp, opp->irq_tim0 + idx);
877                 break;
878         }
879
880 out:
881         pr_debug("%s: => 0x%08x\n", __func__, retval);
882         *ptr = retval;
883         return 0;
884 }
885
886 static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
887 {
888         struct openpic *opp = opaque;
889         int idx;
890
891         pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
892
893         addr = addr & 0xffff;
894         idx = addr >> 5;
895
896         switch (addr & 0x1f) {
897         case 0x00:
898                 write_IRQreg_ivpr(opp, idx, val);
899                 break;
900         case 0x10:
901                 write_IRQreg_idr(opp, idx, val);
902                 break;
903         case 0x18:
904                 write_IRQreg_ilr(opp, idx, val);
905                 break;
906         }
907
908         return 0;
909 }
910
911 static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
912 {
913         struct openpic *opp = opaque;
914         uint32_t retval;
915         int idx;
916
917         pr_debug("%s: addr %#llx\n", __func__, addr);
918         retval = 0xFFFFFFFF;
919
920         addr = addr & 0xffff;
921         idx = addr >> 5;
922
923         switch (addr & 0x1f) {
924         case 0x00:
925                 retval = read_IRQreg_ivpr(opp, idx);
926                 break;
927         case 0x10:
928                 retval = read_IRQreg_idr(opp, idx);
929                 break;
930         case 0x18:
931                 retval = read_IRQreg_ilr(opp, idx);
932                 break;
933         }
934
935         pr_debug("%s: => 0x%08x\n", __func__, retval);
936         *ptr = retval;
937         return 0;
938 }
939
940 static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
941 {
942         struct openpic *opp = opaque;
943         int idx = opp->irq_msi;
944         int srs, ibs;
945
946         pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
947         if (addr & 0xF)
948                 return 0;
949
950         switch (addr) {
951         case MSIIR_OFFSET:
952                 srs = val >> MSIIR_SRS_SHIFT;
953                 idx += srs;
954                 ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
955                 opp->msi[srs].msir |= 1 << ibs;
956                 openpic_set_irq(opp, idx, 1);
957                 break;
958         default:
959                 /* most registers are read-only, thus ignored */
960                 break;
961         }
962
963         return 0;
964 }
965
966 static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
967 {
968         struct openpic *opp = opaque;
969         uint32_t r = 0;
970         int i, srs;
971
972         pr_debug("%s: addr %#llx\n", __func__, addr);
973         if (addr & 0xF)
974                 return -ENXIO;
975
976         srs = addr >> 4;
977
978         switch (addr) {
979         case 0x00:
980         case 0x10:
981         case 0x20:
982         case 0x30:
983         case 0x40:
984         case 0x50:
985         case 0x60:
986         case 0x70:              /* MSIRs */
987                 r = opp->msi[srs].msir;
988                 /* Clear on read */
989                 opp->msi[srs].msir = 0;
990                 openpic_set_irq(opp, opp->irq_msi + srs, 0);
991                 break;
992         case 0x120:             /* MSISR */
993                 for (i = 0; i < MAX_MSI; i++)
994                         r |= (opp->msi[i].msir ? 1 : 0) << i;
995                 break;
996         }
997
998         pr_debug("%s: => 0x%08x\n", __func__, r);
999         *ptr = r;
1000         return 0;
1001 }
1002
1003 static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
1004 {
1005         uint32_t r = 0;
1006
1007         pr_debug("%s: addr %#llx\n", __func__, addr);
1008
1009         /* TODO: EISR/EIMR */
1010
1011         *ptr = r;
1012         return 0;
1013 }
1014
1015 static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
1016 {
1017         pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
1018
1019         /* TODO: EISR/EIMR */
1020         return 0;
1021 }
1022
1023 static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
1024                                       u32 val, int idx)
1025 {
1026         struct openpic *opp = opaque;
1027         struct irq_source *src;
1028         struct irq_dest *dst;
1029         int s_IRQ, n_IRQ;
1030
1031         pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
1032                 addr, val);
1033
1034         if (idx < 0)
1035                 return 0;
1036
1037         if (addr & 0xF)
1038                 return 0;
1039
1040         dst = &opp->dst[idx];
1041         addr &= 0xFF0;
1042         switch (addr) {
1043         case 0x40:              /* IPIDR */
1044         case 0x50:
1045         case 0x60:
1046         case 0x70:
1047                 idx = (addr - 0x40) >> 4;
1048                 /* we use IDE as mask which CPUs to deliver the IPI to still. */
1049                 opp->src[opp->irq_ipi0 + idx].destmask |= val;
1050                 openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
1051                 openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
1052                 break;
1053         case 0x80:              /* CTPR */
1054                 dst->ctpr = val & 0x0000000F;
1055
1056                 pr_debug("%s: set CPU %d ctpr to %d, raised %d servicing %d\n",
1057                         __func__, idx, dst->ctpr, dst->raised.priority,
1058                         dst->servicing.priority);
1059
1060                 if (dst->raised.priority <= dst->ctpr) {
1061                         pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
1062                                 __func__, idx);
1063                         mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1064                 } else if (dst->raised.priority > dst->servicing.priority) {
1065                         pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
1066                                 __func__, idx, dst->raised.next);
1067                         mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1068                 }
1069
1070                 break;
1071         case 0x90:              /* WHOAMI */
1072                 /* Read-only register */
1073                 break;
1074         case 0xA0:              /* IACK */
1075                 /* Read-only register */
1076                 break;
1077         case 0xB0: {            /* EOI */
1078                 int notify_eoi;
1079
1080                 pr_debug("EOI\n");
1081                 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1082
1083                 if (s_IRQ < 0) {
1084                         pr_debug("%s: EOI with no interrupt in service\n",
1085                                 __func__);
1086                         break;
1087                 }
1088
1089                 IRQ_resetbit(&dst->servicing, s_IRQ);
1090                 /* Notify listeners that the IRQ is over */
1091                 notify_eoi = s_IRQ;
1092                 /* Set up next servicing IRQ */
1093                 s_IRQ = IRQ_get_next(opp, &dst->servicing);
1094                 /* Check queued interrupts. */
1095                 n_IRQ = IRQ_get_next(opp, &dst->raised);
1096                 src = &opp->src[n_IRQ];
1097                 if (n_IRQ != -1 &&
1098                     (s_IRQ == -1 ||
1099                      IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
1100                         pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
1101                                 idx, n_IRQ);
1102                         mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
1103                 }
1104
1105                 spin_unlock(&opp->lock);
1106                 kvm_notify_acked_irq(opp->kvm, 0, notify_eoi);
1107                 spin_lock(&opp->lock);
1108
1109                 break;
1110         }
1111         default:
1112                 break;
1113         }
1114
1115         return 0;
1116 }
1117
1118 static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
1119 {
1120         struct openpic *opp = opaque;
1121
1122         return openpic_cpu_write_internal(opp, addr, val,
1123                                          (addr & 0x1f000) >> 12);
1124 }
1125
1126 static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
1127                              int cpu)
1128 {
1129         struct irq_source *src;
1130         int retval, irq;
1131
1132         pr_debug("Lower OpenPIC INT output\n");
1133         mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
1134
1135         irq = IRQ_get_next(opp, &dst->raised);
1136         pr_debug("IACK: irq=%d\n", irq);
1137
1138         if (irq == -1)
1139                 /* No more interrupt pending */
1140                 return opp->spve;
1141
1142         src = &opp->src[irq];
1143         if (!(src->ivpr & IVPR_ACTIVITY_MASK) ||
1144             !(IVPR_PRIORITY(src->ivpr) > dst->ctpr)) {
1145                 pr_err("%s: bad raised IRQ %d ctpr %d ivpr 0x%08x\n",
1146                         __func__, irq, dst->ctpr, src->ivpr);
1147                 openpic_update_irq(opp, irq);
1148                 retval = opp->spve;
1149         } else {
1150                 /* IRQ enter servicing state */
1151                 IRQ_setbit(&dst->servicing, irq);
1152                 retval = IVPR_VECTOR(opp, src->ivpr);
1153         }
1154
1155         if (!src->level) {
1156                 /* edge-sensitive IRQ */
1157                 src->ivpr &= ~IVPR_ACTIVITY_MASK;
1158                 src->pending = 0;
1159                 IRQ_resetbit(&dst->raised, irq);
1160         }
1161
1162         if ((irq >= opp->irq_ipi0) && (irq < (opp->irq_ipi0 + MAX_IPI))) {
1163                 src->destmask &= ~(1 << cpu);
1164                 if (src->destmask && !src->level) {
1165                         /* trigger on CPUs that didn't know about it yet */
1166                         openpic_set_irq(opp, irq, 1);
1167                         openpic_set_irq(opp, irq, 0);
1168                         /* if all CPUs knew about it, set active bit again */
1169                         src->ivpr |= IVPR_ACTIVITY_MASK;
1170                 }
1171         }
1172
1173         return retval;
1174 }
1175
1176 void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
1177 {
1178         struct openpic *opp = vcpu->arch.mpic;
1179         int cpu = vcpu->arch.irq_cpu_id;
1180         unsigned long flags;
1181
1182         spin_lock_irqsave(&opp->lock, flags);
1183
1184         if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
1185                 kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
1186
1187         spin_unlock_irqrestore(&opp->lock, flags);
1188 }
1189
1190 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
1191                                      u32 *ptr, int idx)
1192 {
1193         struct openpic *opp = opaque;
1194         struct irq_dest *dst;
1195         uint32_t retval;
1196
1197         pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
1198         retval = 0xFFFFFFFF;
1199
1200         if (idx < 0)
1201                 goto out;
1202
1203         if (addr & 0xF)
1204                 goto out;
1205
1206         dst = &opp->dst[idx];
1207         addr &= 0xFF0;
1208         switch (addr) {
1209         case 0x80:              /* CTPR */
1210                 retval = dst->ctpr;
1211                 break;
1212         case 0x90:              /* WHOAMI */
1213                 retval = idx;
1214                 break;
1215         case 0xA0:              /* IACK */
1216                 retval = openpic_iack(opp, dst, idx);
1217                 break;
1218         case 0xB0:              /* EOI */
1219                 retval = 0;
1220                 break;
1221         default:
1222                 break;
1223         }
1224         pr_debug("%s: => 0x%08x\n", __func__, retval);
1225
1226 out:
1227         *ptr = retval;
1228         return 0;
1229 }
1230
1231 static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
1232 {
1233         struct openpic *opp = opaque;
1234
1235         return openpic_cpu_read_internal(opp, addr, ptr,
1236                                          (addr & 0x1f000) >> 12);
1237 }
1238
1239 struct mem_reg {
1240         int (*read)(void *opaque, gpa_t addr, u32 *ptr);
1241         int (*write)(void *opaque, gpa_t addr, u32 val);
1242         gpa_t start_addr;
1243         int size;
1244 };
1245
1246 static const struct mem_reg openpic_gbl_mmio = {
1247         .write = openpic_gbl_write,
1248         .read = openpic_gbl_read,
1249         .start_addr = OPENPIC_GLB_REG_START,
1250         .size = OPENPIC_GLB_REG_SIZE,
1251 };
1252
1253 static const struct mem_reg openpic_tmr_mmio = {
1254         .write = openpic_tmr_write,
1255         .read = openpic_tmr_read,
1256         .start_addr = OPENPIC_TMR_REG_START,
1257         .size = OPENPIC_TMR_REG_SIZE,
1258 };
1259
1260 static const struct mem_reg openpic_cpu_mmio = {
1261         .write = openpic_cpu_write,
1262         .read = openpic_cpu_read,
1263         .start_addr = OPENPIC_CPU_REG_START,
1264         .size = OPENPIC_CPU_REG_SIZE,
1265 };
1266
1267 static const struct mem_reg openpic_src_mmio = {
1268         .write = openpic_src_write,
1269         .read = openpic_src_read,
1270         .start_addr = OPENPIC_SRC_REG_START,
1271         .size = OPENPIC_SRC_REG_SIZE,
1272 };
1273
1274 static const struct mem_reg openpic_msi_mmio = {
1275         .read = openpic_msi_read,
1276         .write = openpic_msi_write,
1277         .start_addr = OPENPIC_MSI_REG_START,
1278         .size = OPENPIC_MSI_REG_SIZE,
1279 };
1280
1281 static const struct mem_reg openpic_summary_mmio = {
1282         .read = openpic_summary_read,
1283         .write = openpic_summary_write,
1284         .start_addr = OPENPIC_SUMMARY_REG_START,
1285         .size = OPENPIC_SUMMARY_REG_SIZE,
1286 };
1287
1288 static void add_mmio_region(struct openpic *opp, const struct mem_reg *mr)
1289 {
1290         if (opp->num_mmio_regions >= MAX_MMIO_REGIONS) {
1291                 WARN(1, "kvm mpic: too many mmio regions\n");
1292                 return;
1293         }
1294
1295         opp->mmio_regions[opp->num_mmio_regions++] = mr;
1296 }
1297
1298 static void fsl_common_init(struct openpic *opp)
1299 {
1300         int i;
1301         int virq = MAX_SRC;
1302
1303         add_mmio_region(opp, &openpic_msi_mmio);
1304         add_mmio_region(opp, &openpic_summary_mmio);
1305
1306         opp->vid = VID_REVISION_1_2;
1307         opp->vir = VIR_GENERIC;
1308         opp->vector_mask = 0xFFFF;
1309         opp->tfrr_reset = 0;
1310         opp->ivpr_reset = IVPR_MASK_MASK;
1311         opp->idr_reset = 1 << 0;
1312         opp->max_irq = MAX_IRQ;
1313
1314         opp->irq_ipi0 = virq;
1315         virq += MAX_IPI;
1316         opp->irq_tim0 = virq;
1317         virq += MAX_TMR;
1318
1319         BUG_ON(virq > MAX_IRQ);
1320
1321         opp->irq_msi = 224;
1322
1323         for (i = 0; i < opp->fsl->max_ext; i++)
1324                 opp->src[i].level = false;
1325
1326         /* Internal interrupts, including message and MSI */
1327         for (i = 16; i < MAX_SRC; i++) {
1328                 opp->src[i].type = IRQ_TYPE_FSLINT;
1329                 opp->src[i].level = true;
1330         }
1331
1332         /* timers and IPIs */
1333         for (i = MAX_SRC; i < virq; i++) {
1334                 opp->src[i].type = IRQ_TYPE_FSLSPECIAL;
1335                 opp->src[i].level = false;
1336         }
1337 }
1338
1339 static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
1340 {
1341         int i;
1342
1343         for (i = 0; i < opp->num_mmio_regions; i++) {
1344                 const struct mem_reg *mr = opp->mmio_regions[i];
1345
1346                 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1347                         continue;
1348
1349                 return mr->read(opp, addr - mr->start_addr, ptr);
1350         }
1351
1352         return -ENXIO;
1353 }
1354
1355 static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
1356 {
1357         int i;
1358
1359         for (i = 0; i < opp->num_mmio_regions; i++) {
1360                 const struct mem_reg *mr = opp->mmio_regions[i];
1361
1362                 if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
1363                         continue;
1364
1365                 return mr->write(opp, addr - mr->start_addr, val);
1366         }
1367
1368         return -ENXIO;
1369 }
1370
1371 static int kvm_mpic_read(struct kvm_vcpu *vcpu,
1372                          struct kvm_io_device *this,
1373                          gpa_t addr, int len, void *ptr)
1374 {
1375         struct openpic *opp = container_of(this, struct openpic, mmio);
1376         int ret;
1377         union {
1378                 u32 val;
1379                 u8 bytes[4];
1380         } u;
1381
1382         if (addr & (len - 1)) {
1383                 pr_debug("%s: bad alignment %llx/%d\n",
1384                          __func__, addr, len);
1385                 return -EINVAL;
1386         }
1387
1388         spin_lock_irq(&opp->lock);
1389         ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
1390         spin_unlock_irq(&opp->lock);
1391
1392         /*
1393          * Technically only 32-bit accesses are allowed, but be nice to
1394          * people dumping registers a byte at a time -- it works in real
1395          * hardware (reads only, not writes).
1396          */
1397         if (len == 4) {
1398                 *(u32 *)ptr = u.val;
1399                 pr_debug("%s: addr %llx ret %d len 4 val %x\n",
1400                          __func__, addr, ret, u.val);
1401         } else if (len == 1) {
1402                 *(u8 *)ptr = u.bytes[addr & 3];
1403                 pr_debug("%s: addr %llx ret %d len 1 val %x\n",
1404                          __func__, addr, ret, u.bytes[addr & 3]);
1405         } else {
1406                 pr_debug("%s: bad length %d\n", __func__, len);
1407                 return -EINVAL;
1408         }
1409
1410         return ret;
1411 }
1412
1413 static int kvm_mpic_write(struct kvm_vcpu *vcpu,
1414                           struct kvm_io_device *this,
1415                           gpa_t addr, int len, const void *ptr)
1416 {
1417         struct openpic *opp = container_of(this, struct openpic, mmio);
1418         int ret;
1419
1420         if (len != 4) {
1421                 pr_debug("%s: bad length %d\n", __func__, len);
1422                 return -EOPNOTSUPP;
1423         }
1424         if (addr & 3) {
1425                 pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
1426                 return -EOPNOTSUPP;
1427         }
1428
1429         spin_lock_irq(&opp->lock);
1430         ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
1431                                       *(const u32 *)ptr);
1432         spin_unlock_irq(&opp->lock);
1433
1434         pr_debug("%s: addr %llx ret %d val %x\n",
1435                  __func__, addr, ret, *(const u32 *)ptr);
1436
1437         return ret;
1438 }
1439
1440 static const struct kvm_io_device_ops mpic_mmio_ops = {
1441         .read = kvm_mpic_read,
1442         .write = kvm_mpic_write,
1443 };
1444
1445 static void map_mmio(struct openpic *opp)
1446 {
1447         kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
1448
1449         kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
1450                                 opp->reg_base, OPENPIC_REG_SIZE,
1451                                 &opp->mmio);
1452 }
1453
1454 static void unmap_mmio(struct openpic *opp)
1455 {
1456         kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
1457 }
1458
1459 static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
1460 {
1461         u64 base;
1462
1463         if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
1464                 return -EFAULT;
1465
1466         if (base & 0x3ffff) {
1467                 pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
1468                          __func__, base);
1469                 return -EINVAL;
1470         }
1471
1472         if (base == opp->reg_base)
1473                 return 0;
1474
1475         mutex_lock(&opp->kvm->slots_lock);
1476
1477         unmap_mmio(opp);
1478         opp->reg_base = base;
1479
1480         pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
1481                  __func__, base);
1482
1483         if (base == 0)
1484                 goto out;
1485
1486         map_mmio(opp);
1487
1488 out:
1489         mutex_unlock(&opp->kvm->slots_lock);
1490         return 0;
1491 }
1492
1493 #define ATTR_SET                0
1494 #define ATTR_GET                1
1495
1496 static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
1497 {
1498         int ret;
1499
1500         if (addr & 3)
1501                 return -ENXIO;
1502
1503         spin_lock_irq(&opp->lock);
1504
1505         if (type == ATTR_SET)
1506                 ret = kvm_mpic_write_internal(opp, addr, *val);
1507         else
1508                 ret = kvm_mpic_read_internal(opp, addr, val);
1509
1510         spin_unlock_irq(&opp->lock);
1511
1512         pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
1513
1514         return ret;
1515 }
1516
1517 static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1518 {
1519         struct openpic *opp = dev->private;
1520         u32 attr32;
1521
1522         switch (attr->group) {
1523         case KVM_DEV_MPIC_GRP_MISC:
1524                 switch (attr->attr) {
1525                 case KVM_DEV_MPIC_BASE_ADDR:
1526                         return set_base_addr(opp, attr);
1527                 }
1528
1529                 break;
1530
1531         case KVM_DEV_MPIC_GRP_REGISTER:
1532                 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1533                         return -EFAULT;
1534
1535                 return access_reg(opp, attr->attr, &attr32, ATTR_SET);
1536
1537         case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1538                 if (attr->attr > MAX_SRC)
1539                         return -EINVAL;
1540
1541                 if (get_user(attr32, (u32 __user *)(long)attr->addr))
1542                         return -EFAULT;
1543
1544                 if (attr32 != 0 && attr32 != 1)
1545                         return -EINVAL;
1546
1547                 spin_lock_irq(&opp->lock);
1548                 openpic_set_irq(opp, attr->attr, attr32);
1549                 spin_unlock_irq(&opp->lock);
1550                 return 0;
1551         }
1552
1553         return -ENXIO;
1554 }
1555
1556 static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1557 {
1558         struct openpic *opp = dev->private;
1559         u64 attr64;
1560         u32 attr32;
1561         int ret;
1562
1563         switch (attr->group) {
1564         case KVM_DEV_MPIC_GRP_MISC:
1565                 switch (attr->attr) {
1566                 case KVM_DEV_MPIC_BASE_ADDR:
1567                         mutex_lock(&opp->kvm->slots_lock);
1568                         attr64 = opp->reg_base;
1569                         mutex_unlock(&opp->kvm->slots_lock);
1570
1571                         if (copy_to_user((u64 __user *)(long)attr->addr,
1572                                          &attr64, sizeof(u64)))
1573                                 return -EFAULT;
1574
1575                         return 0;
1576                 }
1577
1578                 break;
1579
1580         case KVM_DEV_MPIC_GRP_REGISTER:
1581                 ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
1582                 if (ret)
1583                         return ret;
1584
1585                 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1586                         return -EFAULT;
1587
1588                 return 0;
1589
1590         case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1591                 if (attr->attr > MAX_SRC)
1592                         return -EINVAL;
1593
1594                 spin_lock_irq(&opp->lock);
1595                 attr32 = opp->src[attr->attr].pending;
1596                 spin_unlock_irq(&opp->lock);
1597
1598                 if (put_user(attr32, (u32 __user *)(long)attr->addr))
1599                         return -EFAULT;
1600
1601                 return 0;
1602         }
1603
1604         return -ENXIO;
1605 }
1606
1607 static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1608 {
1609         switch (attr->group) {
1610         case KVM_DEV_MPIC_GRP_MISC:
1611                 switch (attr->attr) {
1612                 case KVM_DEV_MPIC_BASE_ADDR:
1613                         return 0;
1614                 }
1615
1616                 break;
1617
1618         case KVM_DEV_MPIC_GRP_REGISTER:
1619                 return 0;
1620
1621         case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
1622                 if (attr->attr > MAX_SRC)
1623                         break;
1624
1625                 return 0;
1626         }
1627
1628         return -ENXIO;
1629 }
1630
1631 static void mpic_destroy(struct kvm_device *dev)
1632 {
1633         struct openpic *opp = dev->private;
1634
1635         dev->kvm->arch.mpic = NULL;
1636         kfree(opp);
1637         kfree(dev);
1638 }
1639
1640 static int mpic_set_default_irq_routing(struct openpic *opp)
1641 {
1642         struct kvm_irq_routing_entry *routing;
1643
1644         /* Create a nop default map, so that dereferencing it still works */
1645         routing = kzalloc((sizeof(*routing)), GFP_KERNEL);
1646         if (!routing)
1647                 return -ENOMEM;
1648
1649         kvm_set_irq_routing(opp->kvm, routing, 0, 0);
1650
1651         kfree(routing);
1652         return 0;
1653 }
1654
1655 static int mpic_create(struct kvm_device *dev, u32 type)
1656 {
1657         struct openpic *opp;
1658         int ret;
1659
1660         /* We only support one MPIC at a time for now */
1661         if (dev->kvm->arch.mpic)
1662                 return -EINVAL;
1663
1664         opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
1665         if (!opp)
1666                 return -ENOMEM;
1667
1668         dev->private = opp;
1669         opp->kvm = dev->kvm;
1670         opp->dev = dev;
1671         opp->model = type;
1672         spin_lock_init(&opp->lock);
1673
1674         add_mmio_region(opp, &openpic_gbl_mmio);
1675         add_mmio_region(opp, &openpic_tmr_mmio);
1676         add_mmio_region(opp, &openpic_src_mmio);
1677         add_mmio_region(opp, &openpic_cpu_mmio);
1678
1679         switch (opp->model) {
1680         case KVM_DEV_TYPE_FSL_MPIC_20:
1681                 opp->fsl = &fsl_mpic_20;
1682                 opp->brr1 = 0x00400200;
1683                 opp->flags |= OPENPIC_FLAG_IDR_CRIT;
1684                 opp->nb_irqs = 80;
1685                 opp->mpic_mode_mask = GCR_MODE_MIXED;
1686
1687                 fsl_common_init(opp);
1688
1689                 break;
1690
1691         case KVM_DEV_TYPE_FSL_MPIC_42:
1692                 opp->fsl = &fsl_mpic_42;
1693                 opp->brr1 = 0x00400402;
1694                 opp->flags |= OPENPIC_FLAG_ILR;
1695                 opp->nb_irqs = 196;
1696                 opp->mpic_mode_mask = GCR_MODE_PROXY;
1697
1698                 fsl_common_init(opp);
1699
1700                 break;
1701
1702         default:
1703                 ret = -ENODEV;
1704                 goto err;
1705         }
1706
1707         ret = mpic_set_default_irq_routing(opp);
1708         if (ret)
1709                 goto err;
1710
1711         openpic_reset(opp);
1712
1713         smp_wmb();
1714         dev->kvm->arch.mpic = opp;
1715
1716         return 0;
1717
1718 err:
1719         kfree(opp);
1720         return ret;
1721 }
1722
1723 struct kvm_device_ops kvm_mpic_ops = {
1724         .name = "kvm-mpic",
1725         .create = mpic_create,
1726         .destroy = mpic_destroy,
1727         .set_attr = mpic_set_attr,
1728         .get_attr = mpic_get_attr,
1729         .has_attr = mpic_has_attr,
1730 };
1731
1732 int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
1733                              u32 cpu)
1734 {
1735         struct openpic *opp = dev->private;
1736         int ret = 0;
1737
1738         if (dev->ops != &kvm_mpic_ops)
1739                 return -EPERM;
1740         if (opp->kvm != vcpu->kvm)
1741                 return -EPERM;
1742         if (cpu < 0 || cpu >= MAX_CPU)
1743                 return -EPERM;
1744
1745         spin_lock_irq(&opp->lock);
1746
1747         if (opp->dst[cpu].vcpu) {
1748                 ret = -EEXIST;
1749                 goto out;
1750         }
1751         if (vcpu->arch.irq_type) {
1752                 ret = -EBUSY;
1753                 goto out;
1754         }
1755
1756         opp->dst[cpu].vcpu = vcpu;
1757         opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
1758
1759         vcpu->arch.mpic = opp;
1760         vcpu->arch.irq_cpu_id = cpu;
1761         vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
1762
1763         /* This might need to be changed if GCR gets extended */
1764         if (opp->mpic_mode_mask == GCR_MODE_PROXY)
1765                 vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
1766
1767 out:
1768         spin_unlock_irq(&opp->lock);
1769         return ret;
1770 }
1771
1772 /*
1773  * This should only happen immediately before the mpic is destroyed,
1774  * so we shouldn't need to worry about anything still trying to
1775  * access the vcpu pointer.
1776  */
1777 void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
1778 {
1779         BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
1780
1781         opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
1782 }
1783
1784 /*
1785  * Return value:
1786  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
1787  *  = 0   Interrupt was coalesced (previous irq is still pending)
1788  *  > 0   Number of CPUs interrupt was delivered to
1789  */
1790 static int mpic_set_irq(struct kvm_kernel_irq_routing_entry *e,
1791                         struct kvm *kvm, int irq_source_id, int level,
1792                         bool line_status)
1793 {
1794         u32 irq = e->irqchip.pin;
1795         struct openpic *opp = kvm->arch.mpic;
1796         unsigned long flags;
1797
1798         spin_lock_irqsave(&opp->lock, flags);
1799         openpic_set_irq(opp, irq, level);
1800         spin_unlock_irqrestore(&opp->lock, flags);
1801
1802         /* All code paths we care about don't check for the return value */
1803         return 0;
1804 }
1805
1806 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
1807                 struct kvm *kvm, int irq_source_id, int level, bool line_status)
1808 {
1809         struct openpic *opp = kvm->arch.mpic;
1810         unsigned long flags;
1811
1812         spin_lock_irqsave(&opp->lock, flags);
1813
1814         /*
1815          * XXX We ignore the target address for now, as we only support
1816          *     a single MSI bank.
1817          */
1818         openpic_msi_write(kvm->arch.mpic, MSIIR_OFFSET, e->msi.data);
1819         spin_unlock_irqrestore(&opp->lock, flags);
1820
1821         /* All code paths we care about don't check for the return value */
1822         return 0;
1823 }
1824
1825 int kvm_set_routing_entry(struct kvm *kvm,
1826                           struct kvm_kernel_irq_routing_entry *e,
1827                           const struct kvm_irq_routing_entry *ue)
1828 {
1829         int r = -EINVAL;
1830
1831         switch (ue->type) {
1832         case KVM_IRQ_ROUTING_IRQCHIP:
1833                 e->set = mpic_set_irq;
1834                 e->irqchip.irqchip = ue->u.irqchip.irqchip;
1835                 e->irqchip.pin = ue->u.irqchip.pin;
1836                 if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
1837                         goto out;
1838                 break;
1839         case KVM_IRQ_ROUTING_MSI:
1840                 e->set = kvm_set_msi;
1841                 e->msi.address_lo = ue->u.msi.address_lo;
1842                 e->msi.address_hi = ue->u.msi.address_hi;
1843                 e->msi.data = ue->u.msi.data;
1844                 break;
1845         default:
1846                 goto out;
1847         }
1848
1849         r = 0;
1850 out:
1851         return r;
1852 }