Merge branch 'next' into for-linus
[linux-2.6-microblaze.git] / arch / um / kernel / irq.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2017 - Cambridge Greys Ltd
4  * Copyright (C) 2011 - 2014 Cisco Systems Inc
5  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
6  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
7  *      Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
8  */
9
10 #include <linux/cpumask.h>
11 #include <linux/hardirq.h>
12 #include <linux/interrupt.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/module.h>
15 #include <linux/sched.h>
16 #include <linux/seq_file.h>
17 #include <linux/slab.h>
18 #include <as-layout.h>
19 #include <kern_util.h>
20 #include <os.h>
21 #include <irq_user.h>
22 #include <irq_kern.h>
23 #include <linux/time-internal.h>
24
25
26 extern void free_irqs(void);
27
28 /* When epoll triggers we do not know why it did so
29  * we can also have different IRQs for read and write.
30  * This is why we keep a small irq_reg array for each fd -
31  * one entry per IRQ type
32  */
33 struct irq_reg {
34         void *id;
35         int irq;
36         /* it's cheaper to store this than to query it */
37         int events;
38         bool active;
39         bool pending;
40         bool wakeup;
41 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
42         bool pending_on_resume;
43         void (*timetravel_handler)(int, int, void *,
44                                    struct time_travel_event *);
45         struct time_travel_event event;
46 #endif
47 };
48
49 struct irq_entry {
50         struct list_head list;
51         int fd;
52         struct irq_reg reg[NUM_IRQ_TYPES];
53         bool suspended;
54         bool sigio_workaround;
55 };
56
57 static DEFINE_SPINLOCK(irq_lock);
58 static LIST_HEAD(active_fds);
59 static DECLARE_BITMAP(irqs_allocated, NR_IRQS);
60 static bool irqs_suspended;
61
62 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
63 {
64 /*
65  * irq->active guards against reentry
66  * irq->pending accumulates pending requests
67  * if pending is raised the irq_handler is re-run
68  * until pending is cleared
69  */
70         if (irq->active) {
71                 irq->active = false;
72
73                 do {
74                         irq->pending = false;
75                         do_IRQ(irq->irq, regs);
76                 } while (irq->pending);
77
78                 irq->active = true;
79         } else {
80                 irq->pending = true;
81         }
82 }
83
84 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
85 static void irq_event_handler(struct time_travel_event *ev)
86 {
87         struct irq_reg *reg = container_of(ev, struct irq_reg, event);
88
89         /* do nothing if suspended - just to cause a wakeup */
90         if (irqs_suspended)
91                 return;
92
93         generic_handle_irq(reg->irq);
94 }
95
96 static bool irq_do_timetravel_handler(struct irq_entry *entry,
97                                       enum um_irq_type t)
98 {
99         struct irq_reg *reg = &entry->reg[t];
100
101         if (!reg->timetravel_handler)
102                 return false;
103
104         /* prevent nesting - we'll get it again later when we SIGIO ourselves */
105         if (reg->pending_on_resume)
106                 return true;
107
108         reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event);
109
110         if (!reg->event.pending)
111                 return false;
112
113         if (irqs_suspended)
114                 reg->pending_on_resume = true;
115         return true;
116 }
117 #else
118 static bool irq_do_timetravel_handler(struct irq_entry *entry,
119                                       enum um_irq_type t)
120 {
121         return false;
122 }
123 #endif
124
125 static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
126                               struct uml_pt_regs *regs)
127 {
128         struct irq_reg *reg = &entry->reg[t];
129
130         if (!reg->events)
131                 return;
132
133         if (os_epoll_triggered(idx, reg->events) <= 0)
134                 return;
135
136         if (irq_do_timetravel_handler(entry, t))
137                 return;
138
139         if (irqs_suspended)
140                 return;
141
142         irq_io_loop(reg, regs);
143 }
144
145 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
146 {
147         struct irq_entry *irq_entry;
148         int n, i;
149
150         if (irqs_suspended && !um_irq_timetravel_handler_used())
151                 return;
152
153         while (1) {
154                 /* This is now lockless - epoll keeps back-referencesto the irqs
155                  * which have trigger it so there is no need to walk the irq
156                  * list and lock it every time. We avoid locking by turning off
157                  * IO for a specific fd by executing os_del_epoll_fd(fd) before
158                  * we do any changes to the actual data structures
159                  */
160                 n = os_waiting_for_events_epoll();
161
162                 if (n <= 0) {
163                         if (n == -EINTR)
164                                 continue;
165                         else
166                                 break;
167                 }
168
169                 for (i = 0; i < n ; i++) {
170                         enum um_irq_type t;
171
172                         irq_entry = os_epoll_get_data_pointer(i);
173
174                         for (t = 0; t < NUM_IRQ_TYPES; t++)
175                                 sigio_reg_handler(i, irq_entry, t, regs);
176                 }
177         }
178
179         if (!irqs_suspended)
180                 free_irqs();
181 }
182
183 static struct irq_entry *get_irq_entry_by_fd(int fd)
184 {
185         struct irq_entry *walk;
186
187         lockdep_assert_held(&irq_lock);
188
189         list_for_each_entry(walk, &active_fds, list) {
190                 if (walk->fd == fd)
191                         return walk;
192         }
193
194         return NULL;
195 }
196
197 static void free_irq_entry(struct irq_entry *to_free, bool remove)
198 {
199         if (!to_free)
200                 return;
201
202         if (remove)
203                 os_del_epoll_fd(to_free->fd);
204         list_del(&to_free->list);
205         kfree(to_free);
206 }
207
208 static bool update_irq_entry(struct irq_entry *entry)
209 {
210         enum um_irq_type i;
211         int events = 0;
212
213         for (i = 0; i < NUM_IRQ_TYPES; i++)
214                 events |= entry->reg[i].events;
215
216         if (events) {
217                 /* will modify (instead of add) if needed */
218                 os_add_epoll_fd(events, entry->fd, entry);
219                 return true;
220         }
221
222         os_del_epoll_fd(entry->fd);
223         return false;
224 }
225
226 static void update_or_free_irq_entry(struct irq_entry *entry)
227 {
228         if (!update_irq_entry(entry))
229                 free_irq_entry(entry, false);
230 }
231
232 static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id,
233                        void (*timetravel_handler)(int, int, void *,
234                                                   struct time_travel_event *))
235 {
236         struct irq_entry *irq_entry;
237         int err, events = os_event_mask(type);
238         unsigned long flags;
239
240         err = os_set_fd_async(fd);
241         if (err < 0)
242                 goto out;
243
244         spin_lock_irqsave(&irq_lock, flags);
245         irq_entry = get_irq_entry_by_fd(fd);
246         if (irq_entry) {
247                 /* cannot register the same FD twice with the same type */
248                 if (WARN_ON(irq_entry->reg[type].events)) {
249                         err = -EALREADY;
250                         goto out_unlock;
251                 }
252
253                 /* temporarily disable to avoid IRQ-side locking */
254                 os_del_epoll_fd(fd);
255         } else {
256                 irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
257                 if (!irq_entry) {
258                         err = -ENOMEM;
259                         goto out_unlock;
260                 }
261                 irq_entry->fd = fd;
262                 list_add_tail(&irq_entry->list, &active_fds);
263                 maybe_sigio_broken(fd);
264         }
265
266         irq_entry->reg[type].id = dev_id;
267         irq_entry->reg[type].irq = irq;
268         irq_entry->reg[type].active = true;
269         irq_entry->reg[type].events = events;
270
271 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
272         if (um_irq_timetravel_handler_used()) {
273                 irq_entry->reg[type].timetravel_handler = timetravel_handler;
274                 irq_entry->reg[type].event.fn = irq_event_handler;
275         }
276 #endif
277
278         WARN_ON(!update_irq_entry(irq_entry));
279         spin_unlock_irqrestore(&irq_lock, flags);
280
281         return 0;
282 out_unlock:
283         spin_unlock_irqrestore(&irq_lock, flags);
284 out:
285         return err;
286 }
287
288 /*
289  * Remove the entry or entries for a specific FD, if you
290  * don't want to remove all the possible entries then use
291  * um_free_irq() or deactivate_fd() instead.
292  */
293 void free_irq_by_fd(int fd)
294 {
295         struct irq_entry *to_free;
296         unsigned long flags;
297
298         spin_lock_irqsave(&irq_lock, flags);
299         to_free = get_irq_entry_by_fd(fd);
300         free_irq_entry(to_free, true);
301         spin_unlock_irqrestore(&irq_lock, flags);
302 }
303 EXPORT_SYMBOL(free_irq_by_fd);
304
305 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
306 {
307         struct irq_entry *entry;
308         unsigned long flags;
309
310         spin_lock_irqsave(&irq_lock, flags);
311         list_for_each_entry(entry, &active_fds, list) {
312                 enum um_irq_type i;
313
314                 for (i = 0; i < NUM_IRQ_TYPES; i++) {
315                         struct irq_reg *reg = &entry->reg[i];
316
317                         if (!reg->events)
318                                 continue;
319                         if (reg->irq != irq)
320                                 continue;
321                         if (reg->id != dev)
322                                 continue;
323
324                         os_del_epoll_fd(entry->fd);
325                         reg->events = 0;
326                         update_or_free_irq_entry(entry);
327                         goto out;
328                 }
329         }
330 out:
331         spin_unlock_irqrestore(&irq_lock, flags);
332 }
333
334 void deactivate_fd(int fd, int irqnum)
335 {
336         struct irq_entry *entry;
337         unsigned long flags;
338         enum um_irq_type i;
339
340         os_del_epoll_fd(fd);
341
342         spin_lock_irqsave(&irq_lock, flags);
343         entry = get_irq_entry_by_fd(fd);
344         if (!entry)
345                 goto out;
346
347         for (i = 0; i < NUM_IRQ_TYPES; i++) {
348                 if (!entry->reg[i].events)
349                         continue;
350                 if (entry->reg[i].irq == irqnum)
351                         entry->reg[i].events = 0;
352         }
353
354         update_or_free_irq_entry(entry);
355 out:
356         spin_unlock_irqrestore(&irq_lock, flags);
357
358         ignore_sigio_fd(fd);
359 }
360 EXPORT_SYMBOL(deactivate_fd);
361
362 /*
363  * Called just before shutdown in order to provide a clean exec
364  * environment in case the system is rebooting.  No locking because
365  * that would cause a pointless shutdown hang if something hadn't
366  * released the lock.
367  */
368 int deactivate_all_fds(void)
369 {
370         struct irq_entry *entry;
371
372         /* Stop IO. The IRQ loop has no lock so this is our
373          * only way of making sure we are safe to dispose
374          * of all IRQ handlers
375          */
376         os_set_ioignore();
377
378         /* we can no longer call kfree() here so just deactivate */
379         list_for_each_entry(entry, &active_fds, list)
380                 os_del_epoll_fd(entry->fd);
381         os_close_epoll_fd();
382         return 0;
383 }
384
385 /*
386  * do_IRQ handles all normal device IRQs (the special
387  * SMP cross-CPU interrupts have their own specific
388  * handlers).
389  */
390 unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
391 {
392         struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
393         irq_enter();
394         generic_handle_irq(irq);
395         irq_exit();
396         set_irq_regs(old_regs);
397         return 1;
398 }
399
400 void um_free_irq(int irq, void *dev)
401 {
402         if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq))
403                 return;
404
405         free_irq_by_irq_and_dev(irq, dev);
406         free_irq(irq, dev);
407         clear_bit(irq, irqs_allocated);
408 }
409 EXPORT_SYMBOL(um_free_irq);
410
411 static int
412 _um_request_irq(int irq, int fd, enum um_irq_type type,
413                 irq_handler_t handler, unsigned long irqflags,
414                 const char *devname, void *dev_id,
415                 void (*timetravel_handler)(int, int, void *,
416                                            struct time_travel_event *))
417 {
418         int err;
419
420         if (irq == UM_IRQ_ALLOC) {
421                 int i;
422
423                 for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) {
424                         if (!test_and_set_bit(i, irqs_allocated)) {
425                                 irq = i;
426                                 break;
427                         }
428                 }
429         }
430
431         if (irq < 0)
432                 return -ENOSPC;
433
434         if (fd != -1) {
435                 err = activate_fd(irq, fd, type, dev_id, timetravel_handler);
436                 if (err)
437                         goto error;
438         }
439
440         err = request_irq(irq, handler, irqflags, devname, dev_id);
441         if (err < 0)
442                 goto error;
443
444         return irq;
445 error:
446         clear_bit(irq, irqs_allocated);
447         return err;
448 }
449
450 int um_request_irq(int irq, int fd, enum um_irq_type type,
451                    irq_handler_t handler, unsigned long irqflags,
452                    const char *devname, void *dev_id)
453 {
454         return _um_request_irq(irq, fd, type, handler, irqflags,
455                                devname, dev_id, NULL);
456 }
457 EXPORT_SYMBOL(um_request_irq);
458
459 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
460 int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
461                       irq_handler_t handler, unsigned long irqflags,
462                       const char *devname, void *dev_id,
463                       void (*timetravel_handler)(int, int, void *,
464                                                  struct time_travel_event *))
465 {
466         return _um_request_irq(irq, fd, type, handler, irqflags,
467                                devname, dev_id, timetravel_handler);
468 }
469 EXPORT_SYMBOL(um_request_irq_tt);
470 #endif
471
472 #ifdef CONFIG_PM_SLEEP
473 void um_irqs_suspend(void)
474 {
475         struct irq_entry *entry;
476         unsigned long flags;
477
478         irqs_suspended = true;
479
480         spin_lock_irqsave(&irq_lock, flags);
481         list_for_each_entry(entry, &active_fds, list) {
482                 enum um_irq_type t;
483                 bool clear = true;
484
485                 for (t = 0; t < NUM_IRQ_TYPES; t++) {
486                         if (!entry->reg[t].events)
487                                 continue;
488
489                         /*
490                          * For the SIGIO_WRITE_IRQ, which is used to handle the
491                          * SIGIO workaround thread, we need special handling:
492                          * enable wake for it itself, but below we tell it about
493                          * any FDs that should be suspended.
494                          */
495                         if (entry->reg[t].wakeup ||
496                             entry->reg[t].irq == SIGIO_WRITE_IRQ
497 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
498                             || entry->reg[t].timetravel_handler
499 #endif
500                             ) {
501                                 clear = false;
502                                 break;
503                         }
504                 }
505
506                 if (clear) {
507                         entry->suspended = true;
508                         os_clear_fd_async(entry->fd);
509                         entry->sigio_workaround =
510                                 !__ignore_sigio_fd(entry->fd);
511                 }
512         }
513         spin_unlock_irqrestore(&irq_lock, flags);
514 }
515
516 void um_irqs_resume(void)
517 {
518         struct irq_entry *entry;
519         unsigned long flags;
520
521
522         local_irq_save(flags);
523 #ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
524         /*
525          * We don't need to lock anything here since we're in resume
526          * and nothing else is running, but have disabled IRQs so we
527          * don't try anything else with the interrupt list from there.
528          */
529         list_for_each_entry(entry, &active_fds, list) {
530                 enum um_irq_type t;
531
532                 for (t = 0; t < NUM_IRQ_TYPES; t++) {
533                         struct irq_reg *reg = &entry->reg[t];
534
535                         if (reg->pending_on_resume) {
536                                 irq_enter();
537                                 generic_handle_irq(reg->irq);
538                                 irq_exit();
539                                 reg->pending_on_resume = false;
540                         }
541                 }
542         }
543 #endif
544
545         spin_lock(&irq_lock);
546         list_for_each_entry(entry, &active_fds, list) {
547                 if (entry->suspended) {
548                         int err = os_set_fd_async(entry->fd);
549
550                         WARN(err < 0, "os_set_fd_async returned %d\n", err);
551                         entry->suspended = false;
552
553                         if (entry->sigio_workaround) {
554                                 err = __add_sigio_fd(entry->fd);
555                                 WARN(err < 0, "add_sigio_returned %d\n", err);
556                         }
557                 }
558         }
559         spin_unlock_irqrestore(&irq_lock, flags);
560
561         irqs_suspended = false;
562         send_sigio_to_self();
563 }
564
565 static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
566 {
567         struct irq_entry *entry;
568         unsigned long flags;
569
570         spin_lock_irqsave(&irq_lock, flags);
571         list_for_each_entry(entry, &active_fds, list) {
572                 enum um_irq_type t;
573
574                 for (t = 0; t < NUM_IRQ_TYPES; t++) {
575                         if (!entry->reg[t].events)
576                                 continue;
577
578                         if (entry->reg[t].irq != d->irq)
579                                 continue;
580                         entry->reg[t].wakeup = on;
581                         goto unlock;
582                 }
583         }
584 unlock:
585         spin_unlock_irqrestore(&irq_lock, flags);
586         return 0;
587 }
588 #else
589 #define normal_irq_set_wake NULL
590 #endif
591
592 /*
593  * irq_chip must define at least enable/disable and ack when
594  * the edge handler is used.
595  */
596 static void dummy(struct irq_data *d)
597 {
598 }
599
600 /* This is used for everything other than the timer. */
601 static struct irq_chip normal_irq_type = {
602         .name = "SIGIO",
603         .irq_disable = dummy,
604         .irq_enable = dummy,
605         .irq_ack = dummy,
606         .irq_mask = dummy,
607         .irq_unmask = dummy,
608         .irq_set_wake = normal_irq_set_wake,
609 };
610
611 static struct irq_chip alarm_irq_type = {
612         .name = "SIGALRM",
613         .irq_disable = dummy,
614         .irq_enable = dummy,
615         .irq_ack = dummy,
616         .irq_mask = dummy,
617         .irq_unmask = dummy,
618 };
619
620 void __init init_IRQ(void)
621 {
622         int i;
623
624         irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
625
626         for (i = 1; i < NR_IRQS; i++)
627                 irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
628         /* Initialize EPOLL Loop */
629         os_setup_epoll();
630 }
631
632 /*
633  * IRQ stack entry and exit:
634  *
635  * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
636  * and switch over to the IRQ stack after some preparation.  We use
637  * sigaltstack to receive signals on a separate stack from the start.
638  * These two functions make sure the rest of the kernel won't be too
639  * upset by being on a different stack.  The IRQ stack has a
640  * thread_info structure at the bottom so that current et al continue
641  * to work.
642  *
643  * to_irq_stack copies the current task's thread_info to the IRQ stack
644  * thread_info and sets the tasks's stack to point to the IRQ stack.
645  *
646  * from_irq_stack copies the thread_info struct back (flags may have
647  * been modified) and resets the task's stack pointer.
648  *
649  * Tricky bits -
650  *
651  * What happens when two signals race each other?  UML doesn't block
652  * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
653  * could arrive while a previous one is still setting up the
654  * thread_info.
655  *
656  * There are three cases -
657  *     The first interrupt on the stack - sets up the thread_info and
658  * handles the interrupt
659  *     A nested interrupt interrupting the copying of the thread_info -
660  * can't handle the interrupt, as the stack is in an unknown state
661  *     A nested interrupt not interrupting the copying of the
662  * thread_info - doesn't do any setup, just handles the interrupt
663  *
664  * The first job is to figure out whether we interrupted stack setup.
665  * This is done by xchging the signal mask with thread_info->pending.
666  * If the value that comes back is zero, then there is no setup in
667  * progress, and the interrupt can be handled.  If the value is
668  * non-zero, then there is stack setup in progress.  In order to have
669  * the interrupt handled, we leave our signal in the mask, and it will
670  * be handled by the upper handler after it has set up the stack.
671  *
672  * Next is to figure out whether we are the outer handler or a nested
673  * one.  As part of setting up the stack, thread_info->real_thread is
674  * set to non-NULL (and is reset to NULL on exit).  This is the
675  * nesting indicator.  If it is non-NULL, then the stack is already
676  * set up and the handler can run.
677  */
678
679 static unsigned long pending_mask;
680
681 unsigned long to_irq_stack(unsigned long *mask_out)
682 {
683         struct thread_info *ti;
684         unsigned long mask, old;
685         int nested;
686
687         mask = xchg(&pending_mask, *mask_out);
688         if (mask != 0) {
689                 /*
690                  * If any interrupts come in at this point, we want to
691                  * make sure that their bits aren't lost by our
692                  * putting our bit in.  So, this loop accumulates bits
693                  * until xchg returns the same value that we put in.
694                  * When that happens, there were no new interrupts,
695                  * and pending_mask contains a bit for each interrupt
696                  * that came in.
697                  */
698                 old = *mask_out;
699                 do {
700                         old |= mask;
701                         mask = xchg(&pending_mask, old);
702                 } while (mask != old);
703                 return 1;
704         }
705
706         ti = current_thread_info();
707         nested = (ti->real_thread != NULL);
708         if (!nested) {
709                 struct task_struct *task;
710                 struct thread_info *tti;
711
712                 task = cpu_tasks[ti->cpu].task;
713                 tti = task_thread_info(task);
714
715                 *ti = *tti;
716                 ti->real_thread = tti;
717                 task->stack = ti;
718         }
719
720         mask = xchg(&pending_mask, 0);
721         *mask_out |= mask | nested;
722         return 0;
723 }
724
725 unsigned long from_irq_stack(int nested)
726 {
727         struct thread_info *ti, *to;
728         unsigned long mask;
729
730         ti = current_thread_info();
731
732         pending_mask = 1;
733
734         to = ti->real_thread;
735         current->stack = to;
736         ti->real_thread = NULL;
737         *to = *ti;
738
739         mask = xchg(&pending_mask, 0);
740         return mask & ~1;
741 }
742