s390/kernel: expand exception table logic to allow new handling options
authorIlya Leoshkevich <iii@linux.ibm.com>
Tue, 30 Jun 2020 18:52:03 +0000 (20:52 +0200)
committerHeiko Carstens <hca@linux.ibm.com>
Mon, 20 Jul 2020 08:55:50 +0000 (10:55 +0200)
This is a s390 port of commit 548acf19234d ("x86/mm: Expand the
exception table logic to allow new handling options"), which is needed
for implementing BPF_PROBE_MEM on s390.

The new handler field is made 64-bit in order to allow pointing from
dynamically allocated entries to handlers in kernel text. Unlike on x86,
NULL is used instead of ex_handler_default. This is because exception
tables are used by boot/text_dma.S, and it would be a pain to preserve
ex_handler_default.

The new infrastructure is ignored in early_pgm_check_handler, since
there is no pt_regs.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
arch/s390/include/asm/extable.h
arch/s390/include/asm/linkage.h
arch/s390/kernel/kprobes.c
arch/s390/kernel/traps.c
arch/s390/mm/fault.c
scripts/sorttable.c

index ae27f75..3beb294 100644 (file)
@@ -1,12 +1,20 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __S390_EXTABLE_H
 #define __S390_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
 /*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of three addresses:
+ *
+ * - Address of an instruction that is allowed to fault.
+ * - Address at which the program should continue.
+ * - Optional address of handler that takes pt_regs * argument and runs in
+ *   interrupt context.
+ *
+ * No registers are modified, so it is entirely up to the continuation code
+ * to figure out what to do.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -17,6 +25,7 @@
 struct exception_table_entry
 {
        int insn, fixup;
+       long handler;
 };
 
 extern struct exception_table_entry *__start_dma_ex_table;
@@ -29,6 +38,39 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x)
        return (unsigned long)&x->fixup + x->fixup;
 }
 
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+                            struct pt_regs *);
+
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
+{
+       if (likely(!x->handler))
+               return NULL;
+       return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
+
+static inline bool ex_handle(const struct exception_table_entry *x,
+                            struct pt_regs *regs)
+{
+       ex_handler_t handler = ex_fixup_handler(x);
+
+       if (unlikely(handler))
+               return handler(x, regs);
+       regs->psw.addr = extable_fixup(x);
+       return true;
+}
+
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+                                      struct exception_table_entry *b,
+                                      struct exception_table_entry tmp,
+                                      int delta)
+{
+       a->fixup = b->fixup + delta;
+       b->fixup = tmp.fixup - delta;
+       a->handler = b->handler + delta;
+       b->handler = tmp.handler - delta;
+}
+
 #endif
index 1b52c07..a0a7a2c 100644 (file)
 
 #define __EX_TABLE(_section, _fault, _target)                          \
        stringify_in_c(.section _section,"a";)                          \
-       stringify_in_c(.align   4;)                                     \
+       stringify_in_c(.align   8;)                                     \
        stringify_in_c(.long    (_fault) - .;)                          \
        stringify_in_c(.long    (_target) - .;)                         \
+       stringify_in_c(.quad    0;)                                     \
        stringify_in_c(.previous)
 
 #define EX_TABLE(_fault, _target)                                      \
index 548d0ea..d2a71d8 100644 (file)
@@ -523,10 +523,8 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
                 * zero, try to fix up.
                 */
                entry = s390_search_extables(regs->psw.addr);
-               if (entry) {
-                       regs->psw.addr = extable_fixup(entry);
+               if (entry && ex_handle(entry, regs))
                        return 1;
-               }
 
                /*
                 * fixup_exception() could not handle it,
index ff9cc4c..8d1e8a1 100644 (file)
@@ -50,11 +50,8 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
         } else {
                 const struct exception_table_entry *fixup;
                fixup = s390_search_extables(regs->psw.addr);
-                if (fixup)
-                       regs->psw.addr = extable_fixup(fixup);
-               else {
+               if (!fixup || !ex_handle(fixup, regs))
                        die(regs, str);
-               }
         }
 }
 
@@ -251,7 +248,7 @@ void monitor_event_exception(struct pt_regs *regs)
        case BUG_TRAP_TYPE_NONE:
                fixup = s390_search_extables(regs->psw.addr);
                if (fixup)
-                       regs->psw.addr = extable_fixup(fixup);
+                       ex_handle(fixup, regs);
                break;
        case BUG_TRAP_TYPE_WARN:
                break;
index 5988285..aebf918 100644 (file)
@@ -255,10 +255,8 @@ static noinline void do_no_context(struct pt_regs *regs)
 
        /* Are we prepared to handle this kernel fault?  */
        fixup = s390_search_extables(regs->psw.addr);
-       if (fixup) {
-               regs->psw.addr = extable_fixup(fixup);
+       if (fixup && ex_handle(fixup, regs))
                return;
-       }
 
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
index ec6b5e8..0ef3abf 100644 (file)
@@ -255,6 +255,45 @@ static void x86_sort_relative_table(char *extab_image, int image_size)
        }
 }
 
+static void s390_sort_relative_table(char *extab_image, int image_size)
+{
+       int i;
+
+       for (i = 0; i < image_size; i += 16) {
+               char *loc = extab_image + i;
+               uint64_t handler;
+
+               w(r((uint32_t *)loc) + i, (uint32_t *)loc);
+               w(r((uint32_t *)(loc + 4)) + (i + 4), (uint32_t *)(loc + 4));
+               /*
+                * 0 is a special self-relative handler value, which means that
+                * handler should be ignored. It is safe, because it means that
+                * handler field points to itself, which should never happen.
+                * When creating extable-relative values, keep it as 0, since
+                * this should never occur either: it would mean that handler
+                * field points to the first extable entry.
+                */
+               handler = r8((uint64_t *)(loc + 8));
+               if (handler)
+                       handler += i + 8;
+               w8(handler, (uint64_t *)(loc + 8));
+       }
+
+       qsort(extab_image, image_size / 16, 16, compare_relative_table);
+
+       for (i = 0; i < image_size; i += 16) {
+               char *loc = extab_image + i;
+               uint64_t handler;
+
+               w(r((uint32_t *)loc) - i, (uint32_t *)loc);
+               w(r((uint32_t *)(loc + 4)) - (i + 4), (uint32_t *)(loc + 4));
+               handler = r8((uint64_t *)(loc + 8));
+               if (handler)
+                       handler -= i + 8;
+               w8(handler, (uint64_t *)(loc + 8));
+       }
+}
+
 static int do_file(char const *const fname, void *addr)
 {
        int rc = -1;
@@ -297,6 +336,8 @@ static int do_file(char const *const fname, void *addr)
                custom_sort = x86_sort_relative_table;
                break;
        case EM_S390:
+               custom_sort = s390_sort_relative_table;
+               break;
        case EM_AARCH64:
        case EM_PARISC:
        case EM_PPC: