Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rkuo/linux...
[linux-2.6-microblaze.git] / arch / powerpc / platforms / powernv / opal.c
1 /*
2  * PowerNV OPAL high level interfaces
3  *
4  * Copyright 2011 IBM Corp.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #undef DEBUG
13
14 #include <linux/types.h>
15 #include <linux/of.h>
16 #include <linux/of_fdt.h>
17 #include <linux/of_platform.h>
18 #include <linux/interrupt.h>
19 #include <linux/notifier.h>
20 #include <linux/slab.h>
21 #include <linux/sched.h>
22 #include <linux/kobject.h>
23 #include <linux/delay.h>
24 #include <linux/memblock.h>
25 #include <asm/opal.h>
26 #include <asm/firmware.h>
27 #include <asm/mce.h>
28
29 #include "powernv.h"
30
31 /* /sys/firmware/opal */
32 struct kobject *opal_kobj;
33
34 struct opal {
35         u64 base;
36         u64 entry;
37         u64 size;
38 } opal;
39
40 struct mcheck_recoverable_range {
41         u64 start_addr;
42         u64 end_addr;
43         u64 recover_addr;
44 };
45
46 static struct mcheck_recoverable_range *mc_recoverable_range;
47 static int mc_recoverable_range_len;
48
49 static struct device_node *opal_node;
50 static DEFINE_SPINLOCK(opal_write_lock);
51 extern u64 opal_mc_secondary_handler[];
52 static unsigned int *opal_irqs;
53 static unsigned int opal_irq_count;
54 static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
55 static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
56 static DEFINE_SPINLOCK(opal_notifier_lock);
57 static uint64_t last_notified_mask = 0x0ul;
58 static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
59
60 int __init early_init_dt_scan_opal(unsigned long node,
61                                    const char *uname, int depth, void *data)
62 {
63         const void *basep, *entryp, *sizep;
64         unsigned long basesz, entrysz, runtimesz;
65
66         if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
67                 return 0;
68
69         basep  = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
70         entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
71         sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
72
73         if (!basep || !entryp || !sizep)
74                 return 1;
75
76         opal.base = of_read_number(basep, basesz/4);
77         opal.entry = of_read_number(entryp, entrysz/4);
78         opal.size = of_read_number(sizep, runtimesz/4);
79
80         pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%ld)\n",
81                  opal.base, basep, basesz);
82         pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n",
83                  opal.entry, entryp, entrysz);
84         pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%ld)\n",
85                  opal.size, sizep, runtimesz);
86
87         powerpc_firmware_features |= FW_FEATURE_OPAL;
88         if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
89                 powerpc_firmware_features |= FW_FEATURE_OPALv2;
90                 powerpc_firmware_features |= FW_FEATURE_OPALv3;
91                 printk("OPAL V3 detected !\n");
92         } else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
93                 powerpc_firmware_features |= FW_FEATURE_OPALv2;
94                 printk("OPAL V2 detected !\n");
95         } else {
96                 printk("OPAL V1 detected !\n");
97         }
98
99         return 1;
100 }
101
102 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
103                                    const char *uname, int depth, void *data)
104 {
105         unsigned long i, size;
106         const __be32 *prop;
107
108         if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
109                 return 0;
110
111         prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &size);
112
113         if (!prop)
114                 return 1;
115
116         pr_debug("Found machine check recoverable ranges.\n");
117
118         /*
119          * Allocate a buffer to hold the MC recoverable ranges. We would be
120          * accessing them in real mode, hence it needs to be within
121          * RMO region.
122          */
123         mc_recoverable_range =__va(memblock_alloc_base(size, __alignof__(u64),
124                                                         ppc64_rma_size));
125         memset(mc_recoverable_range, 0, size);
126
127         /*
128          * Each recoverable address entry is an (start address,len,
129          * recover address) pair, * 2 cells each, totalling 4 cells per entry.
130          */
131         for (i = 0; i < size / (sizeof(*prop) * 5); i++) {
132                 mc_recoverable_range[i].start_addr =
133                                         of_read_number(prop + (i * 5) + 0, 2);
134                 mc_recoverable_range[i].end_addr =
135                                         mc_recoverable_range[i].start_addr +
136                                         of_read_number(prop + (i * 5) + 2, 1);
137                 mc_recoverable_range[i].recover_addr =
138                                         of_read_number(prop + (i * 5) + 3, 2);
139
140                 pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
141                                 mc_recoverable_range[i].start_addr,
142                                 mc_recoverable_range[i].end_addr,
143                                 mc_recoverable_range[i].recover_addr);
144         }
145         mc_recoverable_range_len = i;
146         return 1;
147 }
148
149 static int __init opal_register_exception_handlers(void)
150 {
151 #ifdef __BIG_ENDIAN__
152         u64 glue;
153
154         if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
155                 return -ENODEV;
156
157         /* Hookup some exception handlers except machine check. We use the
158          * fwnmi area at 0x7000 to provide the glue space to OPAL
159          */
160         glue = 0x7000;
161         opal_register_exception_handler(OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
162                                         0, glue);
163         glue += 128;
164         opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
165 #endif
166
167         return 0;
168 }
169
170 early_initcall(opal_register_exception_handlers);
171
172 int opal_notifier_register(struct notifier_block *nb)
173 {
174         if (!nb) {
175                 pr_warning("%s: Invalid argument (%p)\n",
176                            __func__, nb);
177                 return -EINVAL;
178         }
179
180         atomic_notifier_chain_register(&opal_notifier_head, nb);
181         return 0;
182 }
183
184 static void opal_do_notifier(uint64_t events)
185 {
186         unsigned long flags;
187         uint64_t changed_mask;
188
189         if (atomic_read(&opal_notifier_hold))
190                 return;
191
192         spin_lock_irqsave(&opal_notifier_lock, flags);
193         changed_mask = last_notified_mask ^ events;
194         last_notified_mask = events;
195         spin_unlock_irqrestore(&opal_notifier_lock, flags);
196
197         /*
198          * We feed with the event bits and changed bits for
199          * enough information to the callback.
200          */
201         atomic_notifier_call_chain(&opal_notifier_head,
202                                    events, (void *)changed_mask);
203 }
204
205 void opal_notifier_update_evt(uint64_t evt_mask,
206                               uint64_t evt_val)
207 {
208         unsigned long flags;
209
210         spin_lock_irqsave(&opal_notifier_lock, flags);
211         last_notified_mask &= ~evt_mask;
212         last_notified_mask |= evt_val;
213         spin_unlock_irqrestore(&opal_notifier_lock, flags);
214 }
215
216 void opal_notifier_enable(void)
217 {
218         int64_t rc;
219         uint64_t evt = 0;
220
221         atomic_set(&opal_notifier_hold, 0);
222
223         /* Process pending events */
224         rc = opal_poll_events(&evt);
225         if (rc == OPAL_SUCCESS && evt)
226                 opal_do_notifier(evt);
227 }
228
229 void opal_notifier_disable(void)
230 {
231         atomic_set(&opal_notifier_hold, 1);
232 }
233
234 /*
235  * Opal message notifier based on message type. Allow subscribers to get
236  * notified for specific messgae type.
237  */
238 int opal_message_notifier_register(enum OpalMessageType msg_type,
239                                         struct notifier_block *nb)
240 {
241         if (!nb) {
242                 pr_warning("%s: Invalid argument (%p)\n",
243                            __func__, nb);
244                 return -EINVAL;
245         }
246         if (msg_type > OPAL_MSG_TYPE_MAX) {
247                 pr_warning("%s: Invalid message type argument (%d)\n",
248                            __func__, msg_type);
249                 return -EINVAL;
250         }
251         return atomic_notifier_chain_register(
252                                 &opal_msg_notifier_head[msg_type], nb);
253 }
254
255 static void opal_message_do_notify(uint32_t msg_type, void *msg)
256 {
257         /* notify subscribers */
258         atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
259                                         msg_type, msg);
260 }
261
262 static void opal_handle_message(void)
263 {
264         s64 ret;
265         /*
266          * TODO: pre-allocate a message buffer depending on opal-msg-size
267          * value in /proc/device-tree.
268          */
269         static struct opal_msg msg;
270
271         ret = opal_get_msg(__pa(&msg), sizeof(msg));
272         /* No opal message pending. */
273         if (ret == OPAL_RESOURCE)
274                 return;
275
276         /* check for errors. */
277         if (ret) {
278                 pr_warning("%s: Failed to retrive opal message, err=%lld\n",
279                                 __func__, ret);
280                 return;
281         }
282
283         /* Sanity check */
284         if (msg.msg_type > OPAL_MSG_TYPE_MAX) {
285                 pr_warning("%s: Unknown message type: %u\n",
286                                 __func__, msg.msg_type);
287                 return;
288         }
289         opal_message_do_notify(msg.msg_type, (void *)&msg);
290 }
291
292 static int opal_message_notify(struct notifier_block *nb,
293                           unsigned long events, void *change)
294 {
295         if (events & OPAL_EVENT_MSG_PENDING)
296                 opal_handle_message();
297         return 0;
298 }
299
300 static struct notifier_block opal_message_nb = {
301         .notifier_call  = opal_message_notify,
302         .next           = NULL,
303         .priority       = 0,
304 };
305
306 static int __init opal_message_init(void)
307 {
308         int ret, i;
309
310         for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
311                 ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
312
313         ret = opal_notifier_register(&opal_message_nb);
314         if (ret) {
315                 pr_err("%s: Can't register OPAL event notifier (%d)\n",
316                        __func__, ret);
317                 return ret;
318         }
319         return 0;
320 }
321 early_initcall(opal_message_init);
322
323 int opal_get_chars(uint32_t vtermno, char *buf, int count)
324 {
325         s64 rc;
326         __be64 evt, len;
327
328         if (!opal.entry)
329                 return -ENODEV;
330         opal_poll_events(&evt);
331         if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
332                 return 0;
333         len = cpu_to_be64(count);
334         rc = opal_console_read(vtermno, &len, buf);     
335         if (rc == OPAL_SUCCESS)
336                 return be64_to_cpu(len);
337         return 0;
338 }
339
340 int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
341 {
342         int written = 0;
343         __be64 olen;
344         s64 len, rc;
345         unsigned long flags;
346         __be64 evt;
347
348         if (!opal.entry)
349                 return -ENODEV;
350
351         /* We want put_chars to be atomic to avoid mangling of hvsi
352          * packets. To do that, we first test for room and return
353          * -EAGAIN if there isn't enough.
354          *
355          * Unfortunately, opal_console_write_buffer_space() doesn't
356          * appear to work on opal v1, so we just assume there is
357          * enough room and be done with it
358          */
359         spin_lock_irqsave(&opal_write_lock, flags);
360         if (firmware_has_feature(FW_FEATURE_OPALv2)) {
361                 rc = opal_console_write_buffer_space(vtermno, &olen);
362                 len = be64_to_cpu(olen);
363                 if (rc || len < total_len) {
364                         spin_unlock_irqrestore(&opal_write_lock, flags);
365                         /* Closed -> drop characters */
366                         if (rc)
367                                 return total_len;
368                         opal_poll_events(NULL);
369                         return -EAGAIN;
370                 }
371         }
372
373         /* We still try to handle partial completions, though they
374          * should no longer happen.
375          */
376         rc = OPAL_BUSY;
377         while(total_len > 0 && (rc == OPAL_BUSY ||
378                                 rc == OPAL_BUSY_EVENT || rc == OPAL_SUCCESS)) {
379                 olen = cpu_to_be64(total_len);
380                 rc = opal_console_write(vtermno, &olen, data);
381                 len = be64_to_cpu(olen);
382
383                 /* Closed or other error drop */
384                 if (rc != OPAL_SUCCESS && rc != OPAL_BUSY &&
385                     rc != OPAL_BUSY_EVENT) {
386                         written = total_len;
387                         break;
388                 }
389                 if (rc == OPAL_SUCCESS) {
390                         total_len -= len;
391                         data += len;
392                         written += len;
393                 }
394                 /* This is a bit nasty but we need that for the console to
395                  * flush when there aren't any interrupts. We will clean
396                  * things a bit later to limit that to synchronous path
397                  * such as the kernel console and xmon/udbg
398                  */
399                 do
400                         opal_poll_events(&evt);
401                 while(rc == OPAL_SUCCESS &&
402                         (be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT));
403         }
404         spin_unlock_irqrestore(&opal_write_lock, flags);
405         return written;
406 }
407
408 static int opal_recover_mce(struct pt_regs *regs,
409                                         struct machine_check_event *evt)
410 {
411         int recovered = 0;
412         uint64_t ea = get_mce_fault_addr(evt);
413
414         if (!(regs->msr & MSR_RI)) {
415                 /* If MSR_RI isn't set, we cannot recover */
416                 recovered = 0;
417         } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
418                 /* Platform corrected itself */
419                 recovered = 1;
420         } else if (ea && !is_kernel_addr(ea)) {
421                 /*
422                  * Faulting address is not in kernel text. We should be fine.
423                  * We need to find which process uses this address.
424                  * For now, kill the task if we have received exception when
425                  * in userspace.
426                  *
427                  * TODO: Queue up this address for hwpoisioning later.
428                  */
429                 if (user_mode(regs) && !is_global_init(current)) {
430                         _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
431                         recovered = 1;
432                 } else
433                         recovered = 0;
434         } else if (user_mode(regs) && !is_global_init(current) &&
435                 evt->severity == MCE_SEV_ERROR_SYNC) {
436                 /*
437                  * If we have received a synchronous error when in userspace
438                  * kill the task.
439                  */
440                 _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
441                 recovered = 1;
442         }
443         return recovered;
444 }
445
446 int opal_machine_check(struct pt_regs *regs)
447 {
448         struct machine_check_event evt;
449
450         if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
451                 return 0;
452
453         /* Print things out */
454         if (evt.version != MCE_V1) {
455                 pr_err("Machine Check Exception, Unknown event version %d !\n",
456                        evt.version);
457                 return 0;
458         }
459         machine_check_print_event_info(&evt);
460
461         if (opal_recover_mce(regs, &evt))
462                 return 1;
463         return 0;
464 }
465
466 static uint64_t find_recovery_address(uint64_t nip)
467 {
468         int i;
469
470         for (i = 0; i < mc_recoverable_range_len; i++)
471                 if ((nip >= mc_recoverable_range[i].start_addr) &&
472                     (nip < mc_recoverable_range[i].end_addr))
473                     return mc_recoverable_range[i].recover_addr;
474         return 0;
475 }
476
477 bool opal_mce_check_early_recovery(struct pt_regs *regs)
478 {
479         uint64_t recover_addr = 0;
480
481         if (!opal.base || !opal.size)
482                 goto out;
483
484         if ((regs->nip >= opal.base) &&
485                         (regs->nip <= (opal.base + opal.size)))
486                 recover_addr = find_recovery_address(regs->nip);
487
488         /*
489          * Setup regs->nip to rfi into fixup address.
490          */
491         if (recover_addr)
492                 regs->nip = recover_addr;
493
494 out:
495         return !!recover_addr;
496 }
497
498 static irqreturn_t opal_interrupt(int irq, void *data)
499 {
500         __be64 events;
501
502         opal_handle_interrupt(virq_to_hw(irq), &events);
503
504         opal_do_notifier(events);
505
506         return IRQ_HANDLED;
507 }
508
509 static int opal_sysfs_init(void)
510 {
511         opal_kobj = kobject_create_and_add("opal", firmware_kobj);
512         if (!opal_kobj) {
513                 pr_warn("kobject_create_and_add opal failed\n");
514                 return -ENOMEM;
515         }
516
517         return 0;
518 }
519
520 static int __init opal_init(void)
521 {
522         struct device_node *np, *consoles;
523         const __be32 *irqs;
524         int rc, i, irqlen;
525
526         opal_node = of_find_node_by_path("/ibm,opal");
527         if (!opal_node) {
528                 pr_warn("opal: Node not found\n");
529                 return -ENODEV;
530         }
531
532         /* Register OPAL consoles if any ports */
533         if (firmware_has_feature(FW_FEATURE_OPALv2))
534                 consoles = of_find_node_by_path("/ibm,opal/consoles");
535         else
536                 consoles = of_node_get(opal_node);
537         if (consoles) {
538                 for_each_child_of_node(consoles, np) {
539                         if (strcmp(np->name, "serial"))
540                                 continue;
541                         of_platform_device_create(np, NULL, NULL);
542                 }
543                 of_node_put(consoles);
544         }
545
546         /* Find all OPAL interrupts and request them */
547         irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
548         pr_debug("opal: Found %d interrupts reserved for OPAL\n",
549                  irqs ? (irqlen / 4) : 0);
550         opal_irq_count = irqlen / 4;
551         opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
552         for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
553                 unsigned int hwirq = be32_to_cpup(irqs);
554                 unsigned int irq = irq_create_mapping(NULL, hwirq);
555                 if (irq == NO_IRQ) {
556                         pr_warning("opal: Failed to map irq 0x%x\n", hwirq);
557                         continue;
558                 }
559                 rc = request_irq(irq, opal_interrupt, 0, "opal", NULL);
560                 if (rc)
561                         pr_warning("opal: Error %d requesting irq %d"
562                                    " (0x%x)\n", rc, irq, hwirq);
563                 opal_irqs[i] = irq;
564         }
565
566         /* Create "opal" kobject under /sys/firmware */
567         rc = opal_sysfs_init();
568         if (rc == 0) {
569                 /* Setup error log interface */
570                 rc = opal_elog_init();
571                 /* Setup code update interface */
572                 opal_flash_init();
573                 /* Setup platform dump extract interface */
574                 opal_platform_dump_init();
575                 /* Setup system parameters interface */
576                 opal_sys_param_init();
577         }
578
579         return 0;
580 }
581 subsys_initcall(opal_init);
582
583 void opal_shutdown(void)
584 {
585         unsigned int i;
586         long rc = OPAL_BUSY;
587
588         /* First free interrupts, which will also mask them */
589         for (i = 0; i < opal_irq_count; i++) {
590                 if (opal_irqs[i])
591                         free_irq(opal_irqs[i], NULL);
592                 opal_irqs[i] = 0;
593         }
594
595         /*
596          * Then sync with OPAL which ensure anything that can
597          * potentially write to our memory has completed such
598          * as an ongoing dump retrieval
599          */
600         while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
601                 rc = opal_sync_host_reboot();
602                 if (rc == OPAL_BUSY)
603                         opal_poll_events(NULL);
604                 else
605                         mdelay(10);
606         }
607 }