Merge branch 'for-next/mm' into for-next/core

author Will Deacon <will@kernel.org>

Thu, 24 Jun 2021 13:04:33 +0000 (14:04 +0100)

committer Will Deacon <will@kernel.org>

Thu, 24 Jun 2021 13:04:33 +0000 (14:04 +0100)
author Will Deacon <will@kernel.org>
Thu, 24 Jun 2021 13:04:33 +0000 (14:04 +0100)
committer Will Deacon <will@kernel.org>
Thu, 24 Jun 2021 13:04:33 +0000 (14:04 +0100)
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst

index 18b8cc1..a9192e7 100644 (file)
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -277,6 +277,12 @@ Before jumping into the kernel, the following conditions must be met:
  
      - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
  
+  For CPUs with support for HCRX_EL2 (FEAT_HCX) present:
+
+  - If EL3 is present and the kernel is entered at EL2:
+
+    - SCR_EL3.HXEn (bit 38) must be initialised to 0b1.
+
    For CPUs with Advanced SIMD and floating point support:
  
    - If EL3 is present:
diff --git a/Makefile b/Makefile

index e446835..e38c74d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1031,7 +1031,7 @@ LDFLAGS_vmlinux   += $(call ld-option, -X,)
  endif
  
  ifeq ($(CONFIG_RELR),y)
-LDFLAGS_vmlinux        += --pack-dyn-relocs=relr
+LDFLAGS_vmlinux        += --pack-dyn-relocs=relr --use-android-relr-tags
  endif
  
  # We never want expected sections to be placed heuristically by the
diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h

index 0d67ed6..dc8f53f 100644 (file)
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -49,4 +49,9 @@ extern int arm_cpuidle_suspend(int index);
  
  extern int arm_cpuidle_init(int cpu);
  
+struct arm_cpuidle_irq_context { };
+
+#define arm_cpuidle_save_irq_context(c)                (void)c
+#define arm_cpuidle_restore_irq_context(c)     (void)c
+
  #endif
diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h

index 8a078fc..7e157ab 100644 (file)
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -3,12 +3,10 @@
  #define __ASM_ALTERNATIVE_MACROS_H
  
  #include <asm/cpucaps.h>
+#include <asm/insn-def.h>
  
  #define ARM64_CB_PATCH ARM64_NCAPS
  
-/* A64 instructions are always 32 bits. */
-#define        AARCH64_INSN_SIZE               4
-
  #ifndef __ASSEMBLY__
  
  #include <linux/stringify.h>
@@ -197,11 +195,6 @@ alternative_endif
  #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)  \
         alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
  
-.macro user_alt, label, oldinstr, newinstr, cond
-9999:  alternative_insn "\oldinstr", "\newinstr", \cond
-       _asm_extable 9999b, \label
-.endm
-
  #endif  /*  __ASSEMBLY__  */
  
  /*
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h

index 934b9be..4ad22c3 100644 (file)
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void)
  #define gic_read_lpir(c)               readq_relaxed(c)
  #define gic_write_lpir(v, c)           writeq_relaxed(v, c)
  
-#define gic_flush_dcache_to_poc(a,l)   __flush_dcache_area((a), (l))
+#define gic_flush_dcache_to_poc(a,l)   \
+       dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
  
  #define gits_read_baser(c)             readq_relaxed(c)
  #define gits_write_baser(v, c)         writeq_relaxed(v, c)
diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h

index 1c9a3a0..ec1d965 100644 (file)
--- a/arch/arm64/include/asm/asm-prototypes.h
+++ b/arch/arm64/include/asm/asm-prototypes.h
@@ -23,4 +23,10 @@ long long __ashlti3(long long a, int b);
  long long __ashrti3(long long a, int b);
  long long __lshrti3(long long a, int b);
  
+/*
+ * This function uses a custom calling convention and cannot be called from C so
+ * this prototype is not entirely accurate.
+ */
+void __hwasan_tag_mismatch(unsigned long addr, unsigned long access_info);
+
  #endif /* __ASM_PROTOTYPES_H */
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h

index 8418c1b..89faca0 100644 (file)
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -130,15 +130,27 @@ alternative_endif
         .endm
  
  /*
- * Emit an entry into the exception table
+ * Create an exception table entry for `insn`, which will branch to `fixup`
+ * when an unhandled fault is taken.
   */
-       .macro          _asm_extable, from, to
+       .macro          _asm_extable, insn, fixup
         .pushsection    __ex_table, "a"
         .align          3
-       .long           (\from - .), (\to - .)
+       .long           (\insn - .), (\fixup - .)
         .popsection
         .endm
  
+/*
+ * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
+ * do nothing.
+ */
+       .macro          _cond_extable, insn, fixup
+       .ifnc           \fixup,
+       _asm_extable    \insn, \fixup
+       .endif
+       .endm
+
+
  #define USER(l, x...)                          \
  9999:  x;                                      \
         _asm_extable    9999b, l
@@ -232,15 +244,23 @@ lr        .req    x30             // link register
          * @dst: destination register
          */
  #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
-       .macro  this_cpu_offset, dst
+       .macro  get_this_cpu_offset, dst
         mrs     \dst, tpidr_el2
         .endm
  #else
-       .macro  this_cpu_offset, dst
+       .macro  get_this_cpu_offset, dst
  alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
         mrs     \dst, tpidr_el1
  alternative_else
         mrs     \dst, tpidr_el2
+alternative_endif
+       .endm
+
+       .macro  set_this_cpu_offset, src
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+       msr     tpidr_el1, \src
+alternative_else
+       msr     tpidr_el2, \src
  alternative_endif
         .endm
  #endif
@@ -253,7 +273,7 @@ alternative_endif
         .macro adr_this_cpu, dst, sym, tmp
         adrp    \tmp, \sym
         add     \dst, \tmp, #:lo12:\sym
-       this_cpu_offset \tmp
+       get_this_cpu_offset \tmp
         add     \dst, \dst, \tmp
         .endm
  
@@ -264,7 +284,7 @@ alternative_endif
          */
         .macro ldr_this_cpu dst, sym, tmp
         adr_l   \dst, \sym
-       this_cpu_offset \tmp
+       get_this_cpu_offset \tmp
         ldr     \dst, [\dst, \tmp]
         .endm
  
@@ -375,51 +395,53 @@ alternative_cb_end
         bfi     \tcr, \tmp0, \pos, #3
         .endm
  
+       .macro __dcache_op_workaround_clean_cache, op, addr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+       dc      \op, \addr
+alternative_else
+       dc      civac, \addr
+alternative_endif
+       .endm
+
  /*
   * Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
+ * [start, end)
   *
   *     op:             operation passed to dc instruction
   *     domain:         domain used in dsb instruciton
- *     kaddr:          starting virtual address of the region
- *     size:           size of the region
- *     Corrupts:       kaddr, size, tmp1, tmp2
+ *     start:          starting virtual address of the region
+ *     end:            end virtual address of the region
+ *     fixup:          optional label to branch to on user fault
+ *     Corrupts:       start, end, tmp1, tmp2
   */
-       .macro __dcache_op_workaround_clean_cache, op, kaddr
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-       dc      \op, \kaddr
-alternative_else
-       dc      civac, \kaddr
-alternative_endif
-       .endm
-
-       .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+       .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
         dcache_line_size \tmp1, \tmp2
-       add     \size, \kaddr, \size
         sub     \tmp2, \tmp1, #1
-       bic     \kaddr, \kaddr, \tmp2
-9998:
+       bic     \start, \start, \tmp2
+.Ldcache_op\@:
         .ifc    \op, cvau
-       __dcache_op_workaround_clean_cache \op, \kaddr
+       __dcache_op_workaround_clean_cache \op, \start
         .else
         .ifc    \op, cvac
-       __dcache_op_workaround_clean_cache \op, \kaddr
+       __dcache_op_workaround_clean_cache \op, \start
         .else
         .ifc    \op, cvap
-       sys     3, c7, c12, 1, \kaddr   // dc cvap
+       sys     3, c7, c12, 1, \start   // dc cvap
         .else
         .ifc    \op, cvadp
-       sys     3, c7, c13, 1, \kaddr   // dc cvadp
+       sys     3, c7, c13, 1, \start   // dc cvadp
         .else
-       dc      \op, \kaddr
+       dc      \op, \start
         .endif
         .endif
         .endif
         .endif
-       add     \kaddr, \kaddr, \tmp1
-       cmp     \kaddr, \size
-       b.lo    9998b
+       add     \start, \start, \tmp1
+       cmp     \start, \end
+       b.lo    .Ldcache_op\@
         dsb     \domain
+
+       _cond_extable .Ldcache_op\@, \fixup
         .endm
  
  /*
@@ -427,20 +449,22 @@ alternative_endif
   * [start, end)
   *
   *     start, end:     virtual addresses describing the region
- *     label:          A label to branch to on user fault.
+ *     fixup:          optional label to branch to on user fault
   *     Corrupts:       tmp1, tmp2
   */
-       .macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+       .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
         icache_line_size \tmp1, \tmp2
         sub     \tmp2, \tmp1, #1
         bic     \tmp2, \start, \tmp2
-9997:
-USER(\label, ic        ivau, \tmp2)                    // invalidate I line PoU
+.Licache_op\@:
+       ic      ivau, \tmp2                     // invalidate I line PoU
         add     \tmp2, \tmp2, \tmp1
         cmp     \tmp2, \end
-       b.lo    9997b
+       b.lo    .Licache_op\@
         dsb     ish
         isb
+
+       _cond_extable .Licache_op\@, \fixup
         .endm
  
  /*
@@ -745,7 +769,7 @@ USER(\label, ic     ivau, \tmp2)                    // invalidate I line PoU
         cbz             \tmp, \lbl
  #endif
         adr_l           \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
-       this_cpu_offset \tmp2
+       get_this_cpu_offset     \tmp2
         ldr             w\tmp, [\tmp, \tmp2]
         cbnz            w\tmp, \lbl     // yield on pending softirq in task context
  .Lnoyield_\@:
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h

index 52e5c16..543c997 100644 (file)
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -30,45 +30,58 @@
   *     the implementation assumes non-aliasing VIPT D-cache and (aliasing)
   *     VIPT I-cache.
   *
- *     flush_icache_range(start, end)
+ *     All functions below apply to the interval [start, end)
+ *             - start  - virtual start address (inclusive)
+ *             - end    - virtual end address (exclusive)
   *
- *             Ensure coherency between the I-cache and the D-cache in the
- *             region described by start, end.
- *             - start  - virtual start address
- *             - end    - virtual end address
+ *     caches_clean_inval_pou(start, end)
   *
- *     invalidate_icache_range(start, end)
+ *             Ensure coherency between the I-cache and the D-cache region to
+ *             the Point of Unification.
   *
- *             Invalidate the I-cache in the region described by start, end.
- *             - start  - virtual start address
- *             - end    - virtual end address
+ *     caches_clean_inval_user_pou(start, end)
   *
- *     __flush_cache_user_range(start, end)
+ *             Ensure coherency between the I-cache and the D-cache region to
+ *             the Point of Unification.
+ *             Use only if the region might access user memory.
   *
- *             Ensure coherency between the I-cache and the D-cache in the
- *             region described by start, end.
- *             - start  - virtual start address
- *             - end    - virtual end address
+ *     icache_inval_pou(start, end)
   *
- *     __flush_dcache_area(kaddr, size)
+ *             Invalidate I-cache region to the Point of Unification.
   *
- *             Ensure that the data held in page is written back.
- *             - kaddr  - page address
- *             - size   - region size
+ *     dcache_clean_inval_poc(start, end)
+ *
+ *             Clean and invalidate D-cache region to the Point of Coherency.
+ *
+ *     dcache_inval_poc(start, end)
+ *
+ *             Invalidate D-cache region to the Point of Coherency.
+ *
+ *     dcache_clean_poc(start, end)
+ *
+ *             Clean D-cache region to the Point of Coherency.
+ *
+ *     dcache_clean_pop(start, end)
+ *
+ *             Clean D-cache region to the Point of Persistence.
+ *
+ *     dcache_clean_pou(start, end)
+ *
+ *             Clean D-cache region to the Point of Unification.
   */
-extern void __flush_icache_range(unsigned long start, unsigned long end);
-extern int  invalidate_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(void *addr, size_t len);
-extern void __inval_dcache_area(void *addr, size_t len);
-extern void __clean_dcache_area_poc(void *addr, size_t len);
-extern void __clean_dcache_area_pop(void *addr, size_t len);
-extern void __clean_dcache_area_pou(void *addr, size_t len);
-extern long __flush_cache_user_range(unsigned long start, unsigned long end);
-extern void sync_icache_aliases(void *kaddr, unsigned long len);
+extern void caches_clean_inval_pou(unsigned long start, unsigned long end);
+extern void icache_inval_pou(unsigned long start, unsigned long end);
+extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_pop(unsigned long start, unsigned long end);
+extern void dcache_clean_pou(unsigned long start, unsigned long end);
+extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(unsigned long start, unsigned long end);
  
  static inline void flush_icache_range(unsigned long start, unsigned long end)
  {
-       __flush_icache_range(start, end);
+       caches_clean_inval_pou(start, end);
  
         /*
          * IPI all online CPUs so that they undergo a context synchronization
@@ -122,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
  #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
  extern void flush_dcache_page(struct page *);
  
-static __always_inline void __flush_icache_all(void)
+static __always_inline void icache_inval_all_pou(void)
  {
         if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
                 return;
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h

index 7faae6f..0f6d16f 100644 (file)
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -12,26 +12,7 @@
  /*
   * Records attributes of an individual CPU.
   */
-struct cpuinfo_arm64 {
-       struct cpu      cpu;
-       struct kobject  kobj;
-       u32             reg_ctr;
-       u32             reg_cntfrq;
-       u32             reg_dczid;
-       u32             reg_midr;
-       u32             reg_revidr;
-
-       u64             reg_id_aa64dfr0;
-       u64             reg_id_aa64dfr1;
-       u64             reg_id_aa64isar0;
-       u64             reg_id_aa64isar1;
-       u64             reg_id_aa64mmfr0;
-       u64             reg_id_aa64mmfr1;
-       u64             reg_id_aa64mmfr2;
-       u64             reg_id_aa64pfr0;
-       u64             reg_id_aa64pfr1;
-       u64             reg_id_aa64zfr0;
-
+struct cpuinfo_32bit {
         u32             reg_id_dfr0;
         u32             reg_id_dfr1;
         u32             reg_id_isar0;
@@ -54,6 +35,30 @@ struct cpuinfo_arm64 {
         u32             reg_mvfr0;
         u32             reg_mvfr1;
         u32             reg_mvfr2;
+};
+
+struct cpuinfo_arm64 {
+       struct cpu      cpu;
+       struct kobject  kobj;
+       u64             reg_ctr;
+       u64             reg_cntfrq;
+       u64             reg_dczid;
+       u64             reg_midr;
+       u64             reg_revidr;
+       u64             reg_gmid;
+
+       u64             reg_id_aa64dfr0;
+       u64             reg_id_aa64dfr1;
+       u64             reg_id_aa64isar0;
+       u64             reg_id_aa64isar1;
+       u64             reg_id_aa64mmfr0;
+       u64             reg_id_aa64mmfr1;
+       u64             reg_id_aa64mmfr2;
+       u64             reg_id_aa64pfr0;
+       u64             reg_id_aa64pfr1;
+       u64             reg_id_aa64zfr0;
+
+       struct cpuinfo_32bit    aarch32;
  
         /* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
         u64             reg_zcr;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h

index 338840c..9bb9d11 100644 (file)
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -619,6 +619,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
         return val > 0;
  }
  
+static inline bool id_aa64pfr1_mte(u64 pfr1)
+{
+       u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
+
+       return val >= ID_AA64PFR1_MTE;
+}
+
  void __init setup_cpu_features(void);
  void check_local_cpu_capabilities(void);
  
@@ -630,9 +637,15 @@ static inline bool cpu_supports_mixed_endian_el0(void)
         return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
  }
  
+const struct cpumask *system_32bit_el0_cpumask(void);
+DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
  static inline bool system_supports_32bit_el0(void)
  {
-       return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
+       u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+       return static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+              id_aa64pfr0_32bit_el0(pfr0);
  }
  
  static inline bool system_supports_4kb_granule(void)
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h

index 3c5ddb4..14a19d1 100644 (file)
--- a/arch/arm64/include/asm/cpuidle.h
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -18,4 +18,39 @@ static inline int arm_cpuidle_suspend(int index)
         return -EOPNOTSUPP;
  }
  #endif
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#include <asm/arch_gicv3.h>
+
+struct arm_cpuidle_irq_context {
+       unsigned long pmr;
+       unsigned long daif_bits;
+};
+
+#define arm_cpuidle_save_irq_context(__c)                              \
+       do {                                                            \
+               struct arm_cpuidle_irq_context *c = __c;                \
+               if (system_uses_irq_prio_masking()) {                   \
+                       c->daif_bits = read_sysreg(daif);               \
+                       write_sysreg(c->daif_bits | PSR_I_BIT | PSR_F_BIT, \
+                                    daif);                             \
+                       c->pmr = gic_read_pmr();                        \
+                       gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); \
+               }                                                       \
+       } while (0)
+
+#define arm_cpuidle_restore_irq_context(__c)                           \
+       do {                                                            \
+               struct arm_cpuidle_irq_context *c = __c;                \
+               if (system_uses_irq_prio_masking()) {                   \
+                       gic_write_pmr(c->pmr);                          \
+                       write_sysreg(c->daif_bits, daif);               \
+               }                                                       \
+       } while (0)
+#else
+struct arm_cpuidle_irq_context { };
+
+#define arm_cpuidle_save_irq_context(c)                (void)c
+#define arm_cpuidle_restore_irq_context(c)     (void)c
+#endif
  #endif
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h

index 3578aba..1bed37e 100644 (file)
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -137,7 +137,7 @@ void efi_virtmap_unload(void);
  
  static inline void efi_capsule_flush_cache_range(void *addr, int size)
  {
-       __flush_dcache_area(addr, size);
+       dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
  }
  
  #endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h

index 6546158..4afbc45 100644 (file)
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -31,20 +31,35 @@ static inline u32 disr_to_esr(u64 disr)
         return esr;
  }
  
-asmlinkage void el1_sync_handler(struct pt_regs *regs);
-asmlinkage void el0_sync_handler(struct pt_regs *regs);
-asmlinkage void el0_sync_compat_handler(struct pt_regs *regs);
+asmlinkage void handle_bad_stack(struct pt_regs *regs);
  
-asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
-asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
+asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el1h_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_32_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_error_handler(struct pt_regs *regs);
+
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+                                 void (*func)(struct pt_regs *));
  asmlinkage void enter_from_user_mode(void);
  asmlinkage void exit_to_user_mode(void);
-void arm64_enter_nmi(struct pt_regs *regs);
-void arm64_exit_nmi(struct pt_regs *regs);
  void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
  void do_undefinstr(struct pt_regs *regs);
  void do_bti(struct pt_regs *regs);
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
  void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
                         struct pt_regs *regs);
  void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
@@ -57,4 +72,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs);
  void do_el0_svc(struct pt_regs *regs);
  void do_el0_svc_compat(struct pt_regs *regs);
  void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
+void do_serror(struct pt_regs *regs, unsigned int esr);
+
+void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
  #endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h

new file mode 100644 (file)

index 0000000..2c075f6
--- /dev/null
+++ b/arch/arm64/include/asm/insn-def.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_INSN_DEF_H
+#define __ASM_INSN_DEF_H
+
+/* A64 instructions are always 32 bits. */
+#define        AARCH64_INSN_SIZE               4
+
+#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h

index 4ebb9c0..6b776c8 100644 (file)
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -10,7 +10,7 @@
  #include <linux/build_bug.h>
  #include <linux/types.h>
  
-#include <asm/alternative.h>
+#include <asm/insn-def.h>
  
  #ifndef __ASSEMBLY__
  /*
@@ -30,6 +30,7 @@
   */
  enum aarch64_insn_encoding_class {
         AARCH64_INSN_CLS_UNKNOWN,       /* UNALLOCATED */
+       AARCH64_INSN_CLS_SVE,           /* SVE instructions */
         AARCH64_INSN_CLS_DP_IMM,        /* Data processing - immediate */
         AARCH64_INSN_CLS_DP_REG,        /* Data processing - register */
         AARCH64_INSN_CLS_DP_FPSIMD,     /* Data processing - SIMD and FP */
@@ -294,6 +295,12 @@ __AARCH64_INSN_FUNCS(adr,  0x9F000000, 0x10000000)
  __AARCH64_INSN_FUNCS(adrp,     0x9F000000, 0x90000000)
  __AARCH64_INSN_FUNCS(prfm,     0x3FC00000, 0x39800000)
  __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
+__AARCH64_INSN_FUNCS(store_imm,        0x3FC00000, 0x39000000)
+__AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000)
+__AARCH64_INSN_FUNCS(store_pre,        0x3FE00C00, 0x38000C00)
+__AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00)
+__AARCH64_INSN_FUNCS(store_post,       0x3FE00C00, 0x38000400)
+__AARCH64_INSN_FUNCS(load_post,        0x3FE00C00, 0x38400400)
  __AARCH64_INSN_FUNCS(str_reg,  0x3FE0EC00, 0x38206800)
  __AARCH64_INSN_FUNCS(ldadd,    0x3F20FC00, 0x38200000)
  __AARCH64_INSN_FUNCS(ldr_reg,  0x3FE0EC00, 0x38606800)
@@ -302,6 +309,8 @@ __AARCH64_INSN_FUNCS(ldrsw_lit,     0xFF000000, 0x98000000)
  __AARCH64_INSN_FUNCS(exclusive,        0x3F800000, 0x08000000)
  __AARCH64_INSN_FUNCS(load_ex,  0x3F400000, 0x08400000)
  __AARCH64_INSN_FUNCS(store_ex, 0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(stp,      0x7FC00000, 0x29000000)
+__AARCH64_INSN_FUNCS(ldp,      0x7FC00000, 0x29400000)
  __AARCH64_INSN_FUNCS(stp_post, 0x7FC00000, 0x28800000)
  __AARCH64_INSN_FUNCS(ldp_post, 0x7FC00000, 0x28C00000)
  __AARCH64_INSN_FUNCS(stp_pre,  0x7FC00000, 0x29800000)
@@ -334,6 +343,7 @@ __AARCH64_INSN_FUNCS(rev64, 0x7FFFFC00, 0x5AC00C00)
  __AARCH64_INSN_FUNCS(and,      0x7F200000, 0x0A000000)
  __AARCH64_INSN_FUNCS(bic,      0x7F200000, 0x0A200000)
  __AARCH64_INSN_FUNCS(orr,      0x7F200000, 0x2A000000)
+__AARCH64_INSN_FUNCS(mov_reg,  0x7FE0FFE0, 0x2A0003E0)
  __AARCH64_INSN_FUNCS(orn,      0x7F200000, 0x2A200000)
  __AARCH64_INSN_FUNCS(eor,      0x7F200000, 0x4A000000)
  __AARCH64_INSN_FUNCS(eon,      0x7F200000, 0x4A200000)
@@ -368,6 +378,14 @@ __AARCH64_INSN_FUNCS(eret_auth,    0xFFFFFBFF, 0xD69F0BFF)
  __AARCH64_INSN_FUNCS(mrs,      0xFFF00000, 0xD5300000)
  __AARCH64_INSN_FUNCS(msr_imm,  0xFFF8F01F, 0xD500401F)
  __AARCH64_INSN_FUNCS(msr_reg,  0xFFF00000, 0xD5100000)
+__AARCH64_INSN_FUNCS(dmb,      0xFFFFF0FF, 0xD50330BF)
+__AARCH64_INSN_FUNCS(dsb_base, 0xFFFFF0FF, 0xD503309F)
+__AARCH64_INSN_FUNCS(dsb_nxs,  0xFFFFF3FF, 0xD503323F)
+__AARCH64_INSN_FUNCS(isb,      0xFFFFF0FF, 0xD50330DF)
+__AARCH64_INSN_FUNCS(sb,       0xFFFFFFFF, 0xD50330FF)
+__AARCH64_INSN_FUNCS(clrex,    0xFFFFF0FF, 0xD503305F)
+__AARCH64_INSN_FUNCS(ssbb,     0xFFFFFFFF, 0xD503309F)
+__AARCH64_INSN_FUNCS(pssbb,    0xFFFFFFFF, 0xD503349F)
  
  #undef __AARCH64_INSN_FUNCS
  
@@ -379,8 +397,47 @@ static inline bool aarch64_insn_is_adr_adrp(u32 insn)
         return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
  }
  
-int aarch64_insn_read(void *addr, u32 *insnp);
-int aarch64_insn_write(void *addr, u32 insn);
+static inline bool aarch64_insn_is_dsb(u32 insn)
+{
+       return aarch64_insn_is_dsb_base(insn) || aarch64_insn_is_dsb_nxs(insn);
+}
+
+static inline bool aarch64_insn_is_barrier(u32 insn)
+{
+       return aarch64_insn_is_dmb(insn) || aarch64_insn_is_dsb(insn) ||
+              aarch64_insn_is_isb(insn) || aarch64_insn_is_sb(insn) ||
+              aarch64_insn_is_clrex(insn) || aarch64_insn_is_ssbb(insn) ||
+              aarch64_insn_is_pssbb(insn);
+}
+
+static inline bool aarch64_insn_is_store_single(u32 insn)
+{
+       return aarch64_insn_is_store_imm(insn) ||
+              aarch64_insn_is_store_pre(insn) ||
+              aarch64_insn_is_store_post(insn);
+}
+
+static inline bool aarch64_insn_is_store_pair(u32 insn)
+{
+       return aarch64_insn_is_stp(insn) ||
+              aarch64_insn_is_stp_pre(insn) ||
+              aarch64_insn_is_stp_post(insn);
+}
+
+static inline bool aarch64_insn_is_load_single(u32 insn)
+{
+       return aarch64_insn_is_load_imm(insn) ||
+              aarch64_insn_is_load_pre(insn) ||
+              aarch64_insn_is_load_post(insn);
+}
+
+static inline bool aarch64_insn_is_load_pair(u32 insn)
+{
+       return aarch64_insn_is_ldp(insn) ||
+              aarch64_insn_is_ldp_pre(insn) ||
+              aarch64_insn_is_ldp_post(insn);
+}
+
  enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
  bool aarch64_insn_uses_literal(u32 insn);
  bool aarch64_insn_is_branch(u32 insn);
@@ -487,9 +544,6 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
  s32 aarch64_get_branch_offset(u32 insn);
  u32 aarch64_set_branch_offset(u32 insn, s32 offset);
  
-int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
-
  s32 aarch64_insn_adrp_get_offset(u32 insn);
  u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
  
@@ -506,6 +560,7 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn);
  
  typedef bool (pstate_check_t)(unsigned long);
  extern pstate_check_t * const aarch32_opcode_cond_checks[16];
+
  #endif /* __ASSEMBLY__ */
  
  #endif /* __ASM_INSN_H */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h

index cf8df03..894edda 100644 (file)
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -8,6 +8,7 @@
  #define __ARM_KVM_ASM_H__
  
  #include <asm/hyp_image.h>
+#include <asm/insn.h>
  #include <asm/virt.h>
  
  #define ARM_EXIT_WITH_SERROR_BIT  31
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h

index 25ed956..f4cbfa9 100644 (file)
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -180,7 +180,8 @@ static inline void *__kvm_vector_slot2addr(void *base,
  
  struct kvm;
  
-#define kvm_flush_dcache_to_poc(a,l)   __flush_dcache_area((a), (l))
+#define kvm_flush_dcache_to_poc(a,l)   \
+       dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
  
  static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
  {
@@ -208,12 +209,12 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
  {
         if (icache_is_aliasing()) {
                 /* any kind of VIPT cache */
-               __flush_icache_all();
+               icache_inval_all_pou();
         } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
                 /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
                 void *va = page_address(pfn_to_page(pfn));
  
-               invalidate_icache_range((unsigned long)va,
+               icache_inval_pou((unsigned long)va,
                                         (unsigned long)va + size);
         }
  }
diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h

index ba89a9a..9906541 100644 (file)
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -56,8 +56,16 @@
                 SYM_FUNC_START_ALIAS(__pi_##x); \
                 SYM_FUNC_START_WEAK(x)
  
+#define SYM_FUNC_START_WEAK_ALIAS_PI(x)                \
+               SYM_FUNC_START_ALIAS(__pi_##x); \
+               SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN)
+
  #define SYM_FUNC_END_PI(x)                     \
                 SYM_FUNC_END(x);                \
                 SYM_FUNC_END_ALIAS(__pi_##x)
  
+#define SYM_FUNC_END_ALIAS_PI(x)               \
+               SYM_FUNC_END_ALIAS(x);          \
+               SYM_FUNC_END_ALIAS(__pi_##x)
+
  #endif
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h

index 8100456..a11ccad 100644 (file)
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,7 +1,20 @@
-#ifdef CONFIG_ARM64_MODULE_PLTS
  SECTIONS {
+#ifdef CONFIG_ARM64_MODULE_PLTS
         .plt 0 (NOLOAD) : { BYTE(0) }
         .init.plt 0 (NOLOAD) : { BYTE(0) }
         .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
-}
  #endif
+
+#ifdef CONFIG_KASAN_SW_TAGS
+       /*
+        * Outlined checks go into comdat-deduplicated sections named .text.hot.
+        * Because they are in comdats they are not combined by the linker and
+        * we otherwise end up with multiple sections with the same .text.hot
+        * name in the .ko file. The kernel module loader warns if it sees
+        * multiple sections with the same name so we use this sections
+        * directive to force them into a single section and silence the
+        * warning.
+        */
+       .text.hot : { *(.text.hot) }
+#endif
+}
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h

new file mode 100644 (file)

index 0000000..6bf5adc
--- /dev/null
+++ b/arch/arm64/include/asm/patching.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef        __ASM_PATCHING_H
+#define        __ASM_PATCHING_H
+
+#include <linux/types.h>
+
+int aarch64_insn_read(void *addr, u32 *insnp);
+int aarch64_insn_write(void *addr, u32 insn);
+
+int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
+#endif /* __ASM_PATCHING_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h

index 9df3fee..7dba1c4 100644 (file)
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -257,8 +257,6 @@ void set_task_sctlr_el1(u64 sctlr);
  extern struct task_struct *cpu_switch_to(struct task_struct *prev,
                                          struct task_struct *next);
  
-asmlinkage void arm64_preempt_schedule_irq(void);
-
  #define task_pt_regs(p) \
         ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
  
@@ -329,13 +327,13 @@ long get_tagged_addr_ctrl(struct task_struct *task);
   * of header definitions for the use of task_stack_page.
   */
  
-#define current_top_of_stack()                                                 \
-({                                                                             \
-       struct stack_info _info;                                                \
-       BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info));   \
-       _info.high;                                                             \
+#define current_top_of_stack()                                                         \
+({                                                                                     \
+       struct stack_info _info;                                                        \
+       BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info));        \
+       _info.high;                                                                     \
  })
-#define on_thread_stack()      (on_task_stack(current, current_stack_pointer, NULL))
+#define on_thread_stack()      (on_task_stack(current, current_stack_pointer, 1, NULL))
  
  #endif /* __ASSEMBLY__ */
  #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h

index eaa2cd9..8297bcc 100644 (file)
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,18 +9,18 @@
  #ifdef CONFIG_SHADOW_CALL_STACK
         scs_sp  .req    x18
  
-       .macro scs_load tsk, tmp
+       .macro scs_load tsk
         ldr     scs_sp, [\tsk, #TSK_TI_SCS_SP]
         .endm
  
-       .macro scs_save tsk, tmp
+       .macro scs_save tsk
         str     scs_sp, [\tsk, #TSK_TI_SCS_SP]
         .endm
  #else
-       .macro scs_load tsk, tmp
+       .macro scs_load tsk
         .endm
  
-       .macro scs_save tsk, tmp
+       .macro scs_save tsk
         .endm
  #endif /* CONFIG_SHADOW_CALL_STACK */
  
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h

index 63e0b92..7bea1d7 100644 (file)
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -37,13 +37,17 @@ struct sdei_registered_event;
  asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
                                         struct sdei_registered_event *arg);
  
+unsigned long do_sdei_event(struct pt_regs *regs,
+                           struct sdei_registered_event *arg);
+
  unsigned long sdei_arch_get_entry_point(int conduit);
  #define sdei_arch_get_entry_point(x)   sdei_arch_get_entry_point(x)
  
  struct stack_info;
  
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info);
-static inline bool on_sdei_stack(unsigned long sp,
+bool _on_sdei_stack(unsigned long sp, unsigned long size,
+                   struct stack_info *info);
+static inline bool on_sdei_stack(unsigned long sp, unsigned long size,
                                 struct stack_info *info)
  {
         if (!IS_ENABLED(CONFIG_VMAP_STACK))
@@ -51,7 +55,7 @@ static inline bool on_sdei_stack(unsigned long sp,
         if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
                 return false;
         if (in_nmi())
-               return _on_sdei_stack(sp, info);
+               return _on_sdei_stack(sp, size, info);
  
         return false;
  }
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h

index 0e35775..fc55f5a 100644 (file)
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -73,12 +73,10 @@ asmlinkage void secondary_start_kernel(void);
  
  /*
   * Initial data for bringing up a secondary CPU.
- * @stack  - sp for the secondary CPU
   * @status - Result passed back from the secondary CPU to
   *           indicate failure.
   */
  struct secondary_data {
-       void *stack;
         struct task_struct *task;
         long status;
  };
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h

index 4b33ca6..1801399 100644 (file)
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -69,14 +69,14 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
  
  DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
  
-static inline bool on_stack(unsigned long sp, unsigned long low,
-                               unsigned long high, enum stack_type type,
-                               struct stack_info *info)
+static inline bool on_stack(unsigned long sp, unsigned long size,
+                           unsigned long low, unsigned long high,
+                           enum stack_type type, struct stack_info *info)
  {
         if (!low)
                 return false;
  
-       if (sp < low || sp >= high)
+       if (sp < low || sp + size < sp || sp + size > high)
                 return false;
  
         if (info) {
@@ -87,38 +87,38 @@ static inline bool on_stack(unsigned long sp, unsigned long low,
         return true;
  }
  
-static inline bool on_irq_stack(unsigned long sp,
+static inline bool on_irq_stack(unsigned long sp, unsigned long size,
                                 struct stack_info *info)
  {
         unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
         unsigned long high = low + IRQ_STACK_SIZE;
  
-       return on_stack(sp, low, high, STACK_TYPE_IRQ, info);
+       return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info);
  }
  
  static inline bool on_task_stack(const struct task_struct *tsk,
-                                unsigned long sp,
+                                unsigned long sp, unsigned long size,
                                  struct stack_info *info)
  {
         unsigned long low = (unsigned long)task_stack_page(tsk);
         unsigned long high = low + THREAD_SIZE;
  
-       return on_stack(sp, low, high, STACK_TYPE_TASK, info);
+       return on_stack(sp, size, low, high, STACK_TYPE_TASK, info);
  }
  
  #ifdef CONFIG_VMAP_STACK
  DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
  
-static inline bool on_overflow_stack(unsigned long sp,
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
                                 struct stack_info *info)
  {
         unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
         unsigned long high = low + OVERFLOW_STACK_SIZE;
  
-       return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info);
+       return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
  }
  #else
-static inline bool on_overflow_stack(unsigned long sp,
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
                         struct stack_info *info) { return false; }
  #endif
  
@@ -128,21 +128,21 @@ static inline bool on_overflow_stack(unsigned long sp,
   * context.
   */
  static inline bool on_accessible_stack(const struct task_struct *tsk,
-                                      unsigned long sp,
+                                      unsigned long sp, unsigned long size,
                                        struct stack_info *info)
  {
         if (info)
                 info->type = STACK_TYPE_UNKNOWN;
  
-       if (on_task_stack(tsk, sp, info))
+       if (on_task_stack(tsk, sp, size, info))
                 return true;
         if (tsk != current || preemptible())
                 return false;
-       if (on_irq_stack(sp, info))
+       if (on_irq_stack(sp, size, info))
                 return true;
-       if (on_overflow_stack(sp, info))
+       if (on_overflow_stack(sp, size, info))
                 return true;
-       if (on_sdei_stack(sp, info))
+       if (on_sdei_stack(sp, size, info))
                 return true;
  
         return false;
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

index 6cc9773..cce3085 100644 (file)
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -14,15 +14,22 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
  CFLAGS_REMOVE_syscall.o         = -fstack-protector -fstack-protector-strong
  CFLAGS_syscall.o       += -fno-stack-protector
  
+# It's not safe to invoke KCOV when portions of the kernel environment aren't
+# available or are out-of-sync with HW state. Since `noinstr` doesn't always
+# inhibit KCOV instrumentation, disable it for the entire compilation unit.
+KCOV_INSTRUMENT_entry.o := n
+KCOV_INSTRUMENT_idle.o := n
+
  # Object file lists.
  obj-y                  := debug-monitors.o entry.o irq.o fpsimd.o              \
                            entry-common.o entry-fpsimd.o process.o ptrace.o     \
                            setup.o signal.o sys.o stacktrace.o time.o traps.o   \
-                          io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o       \
+                          io.o vdso.o hyp-stub.o psci.o cpu_ops.o              \
                            return_address.o cpuinfo.o cpu_errata.o              \
                            cpufeature.o alternative.o cacheinfo.o               \
                            smp.o smp_spin_table.o topology.o smccc-call.o       \
-                          syscall.o proton-pack.o idreg-override.o
+                          syscall.o proton-pack.o idreg-override.o idle.o      \
+                          patching.o
  
  targets                        += efi-entry.o
  
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c

index c906d20..3fb79b7 100644 (file)
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu
          */
         if (!is_module) {
                 dsb(ish);
-               __flush_icache_all();
+               icache_inval_all_pou();
                 isb();
  
                 /* Ignore ARM64_CB bit from feature mask */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c

index 0cb34cc..bd0fc23 100644 (file)
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -27,6 +27,7 @@
  int main(void)
  {
    DEFINE(TSK_ACTIVE_MM,                offsetof(struct task_struct, active_mm));
+  DEFINE(TSK_CPU,              offsetof(struct task_struct, cpu));
    BLANK();
    DEFINE(TSK_TI_FLAGS,         offsetof(struct task_struct, thread_info.flags));
    DEFINE(TSK_TI_PREEMPT,       offsetof(struct task_struct, thread_info.preempt_count));
@@ -99,7 +100,6 @@ int main(void)
    DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
    DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
    BLANK();
-  DEFINE(CPU_BOOT_STACK,       offsetof(struct secondary_data, stack));
    DEFINE(CPU_BOOT_TASK,                offsetof(struct secondary_data, task));
    BLANK();
    DEFINE(FTR_OVR_VAL_OFFSET,   offsetof(struct arm64_ftr_override, val));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c

index efed283..125d5c9 100644 (file)
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -76,6 +76,7 @@
  #include <asm/cpufeature.h>
  #include <asm/cpu_ops.h>
  #include <asm/fpsimd.h>
+#include <asm/insn.h>
  #include <asm/kvm_host.h>
  #include <asm/mmu_context.h>
  #include <asm/mte.h>
@@ -107,6 +108,24 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
  bool arm64_use_ng_mappings = false;
  EXPORT_SYMBOL(arm64_use_ng_mappings);
  
+/*
+ * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
+ * support it?
+ */
+static bool __read_mostly allow_mismatched_32bit_el0;
+
+/*
+ * Static branch enabled only if allow_mismatched_32bit_el0 is set and we have
+ * seen at least one CPU capable of 32-bit EL0.
+ */
+DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
+/*
+ * Mask of CPUs supporting 32-bit EL0.
+ * Only valid if arm64_mismatched_32bit_el0 is enabled.
+ */
+static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly;
+
  /*
   * Flag to indicate if we have computed the system wide
   * capabilities based on the boot time active CPUs. This
@@ -400,6 +419,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = {
         ARM64_FTR_END,
  };
  
+static const struct arm64_ftr_bits ftr_gmid[] = {
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0),
+       ARM64_FTR_END,
+};
+
  static const struct arm64_ftr_bits ftr_id_isar0[] = {
         ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0),
         ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0),
@@ -617,6 +641,9 @@ static const struct __ftr_reg_entry {
         /* Op1 = 0, CRn = 1, CRm = 2 */
         ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
  
+       /* Op1 = 1, CRn = 0, CRm = 0 */
+       ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
+
         /* Op1 = 3, CRn = 0, CRm = 0 */
         { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
         ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
@@ -767,7 +794,7 @@ static void __init sort_ftr_regs(void)
   * Any bits that are not covered by an arm64_ftr_bits entry are considered
   * RES0 for the system-wide value, and must strictly match.
   */
-static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
  {
         u64 val = 0;
         u64 strict_mask = ~0x0ULL;
@@ -863,6 +890,31 @@ static void __init init_cpu_hwcaps_indirect_list(void)
  
  static void __init setup_boot_cpu_capabilities(void);
  
+static void init_32bit_cpu_features(struct cpuinfo_32bit *info)
+{
+       init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+       init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
+       init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+       init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+       init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+       init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+       init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+       init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+       init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
+       init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+       init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+       init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+       init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+       init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
+       init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
+       init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+       init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+       init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
+       init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+       init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+       init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+}
+
  void __init init_cpu_features(struct cpuinfo_arm64 *info)
  {
         /* Before we start using the tables, make sure it is sorted */
@@ -882,35 +934,17 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
         init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
         init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
  
-       if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
-               init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
-               init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
-               init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
-               init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
-               init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
-               init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
-               init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
-               init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
-               init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
-               init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
-               init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
-               init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
-               init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
-               init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
-               init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
-               init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
-               init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
-               init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
-               init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
-               init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
-               init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
-       }
+       if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+               init_32bit_cpu_features(&info->aarch32);
  
         if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
                 init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
                 sve_init_vq_map();
         }
  
+       if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+               init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
+
         /*
          * Initialize the indirect array of CPU hwcaps capabilities pointers
          * before we handle the boot CPU below.
@@ -975,20 +1009,28 @@ static void relax_cpu_ftr_reg(u32 sys_id, int field)
         WARN_ON(!ftrp->width);
  }
  
-static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info,
-                                    struct cpuinfo_arm64 *boot)
+static void lazy_init_32bit_cpu_features(struct cpuinfo_arm64 *info,
+                                        struct cpuinfo_arm64 *boot)
+{
+       static bool boot_cpu_32bit_regs_overridden = false;
+
+       if (!allow_mismatched_32bit_el0 || boot_cpu_32bit_regs_overridden)
+               return;
+
+       if (id_aa64pfr0_32bit_el0(boot->reg_id_aa64pfr0))
+               return;
+
+       boot->aarch32 = info->aarch32;
+       init_32bit_cpu_features(&boot->aarch32);
+       boot_cpu_32bit_regs_overridden = true;
+}
+
+static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info,
+                                    struct cpuinfo_32bit *boot)
  {
         int taint = 0;
         u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
  
-       /*
-        * If we don't have AArch32 at all then skip the checks entirely
-        * as the register values may be UNKNOWN and we're not going to be
-        * using them for anything.
-        */
-       if (!id_aa64pfr0_32bit_el0(pfr0))
-               return taint;
-
         /*
          * If we don't have AArch32 at EL1, then relax the strictness of
          * EL1-dependent register fields to avoid spurious sanity check fails.
@@ -1135,10 +1177,29 @@ void update_cpu_features(int cpu,
         }
  
         /*
+        * The kernel uses the LDGM/STGM instructions and the number of tags
+        * they read/write depends on the GMID_EL1.BS field. Check that the
+        * value is the same on all CPUs.
+        */
+       if (IS_ENABLED(CONFIG_ARM64_MTE) &&
+           id_aa64pfr1_mte(info->reg_id_aa64pfr1)) {
+               taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu,
+                                             info->reg_gmid, boot->reg_gmid);
+       }
+
+       /*
+        * If we don't have AArch32 at all then skip the checks entirely
+        * as the register values may be UNKNOWN and we're not going to be
+        * using them for anything.
+        *
          * This relies on a sanitised view of the AArch64 ID registers
          * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
          */
-       taint |= update_32bit_cpu_features(cpu, info, boot);
+       if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+               lazy_init_32bit_cpu_features(info, boot);
+               taint |= update_32bit_cpu_features(cpu, &info->aarch32,
+                                                  &boot->aarch32);
+       }
  
         /*
          * Mismatched CPU features are a recipe for disaster. Don't even
@@ -1248,6 +1309,28 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
         return feature_matches(val, entry);
  }
  
+const struct cpumask *system_32bit_el0_cpumask(void)
+{
+       if (!system_supports_32bit_el0())
+               return cpu_none_mask;
+
+       if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+               return cpu_32bit_el0_mask;
+
+       return cpu_possible_mask;
+}
+
+static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
+{
+       if (!has_cpuid_feature(entry, scope))
+               return allow_mismatched_32bit_el0;
+
+       if (scope == SCOPE_SYSTEM)
+               pr_info("detected: 32-bit EL0 Support\n");
+
+       return true;
+}
+
  static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
  {
         bool has_sre;
@@ -1866,10 +1949,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                 .cpu_enable = cpu_copy_el2regs,
         },
         {
-               .desc = "32-bit EL0 Support",
-               .capability = ARM64_HAS_32BIT_EL0,
+               .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
                 .type = ARM64_CPUCAP_SYSTEM_FEATURE,
-               .matches = has_cpuid_feature,
+               .matches = has_32bit_el0,
                 .sys_reg = SYS_ID_AA64PFR0_EL1,
                 .sign = FTR_UNSIGNED,
                 .field_pos = ID_AA64PFR0_EL0_SHIFT,
@@ -2378,7 +2460,7 @@ static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
         {},
  };
  
-static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
  {
         switch (cap->hwcap_type) {
         case CAP_HWCAP:
@@ -2423,7 +2505,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
         return rc;
  }
  
-static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
+static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
  {
         /* We support emulation of accesses to CPU ID feature registers */
         cpu_set_named_feature(CPUID);
@@ -2598,7 +2680,7 @@ static void check_early_cpu_features(void)
  }
  
  static void
-verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
+__verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
  {
  
         for (; caps->matches; caps++)
@@ -2609,6 +2691,14 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
                 }
  }
  
+static void verify_local_elf_hwcaps(void)
+{
+       __verify_local_elf_hwcaps(arm64_elf_hwcaps);
+
+       if (id_aa64pfr0_32bit_el0(read_cpuid(ID_AA64PFR0_EL1)))
+               __verify_local_elf_hwcaps(compat_elf_hwcaps);
+}
+
  static void verify_sve_features(void)
  {
         u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
@@ -2673,11 +2763,7 @@ static void verify_local_cpu_capabilities(void)
          * on all secondary CPUs.
          */
         verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU);
-
-       verify_local_elf_hwcaps(arm64_elf_hwcaps);
-
-       if (system_supports_32bit_el0())
-               verify_local_elf_hwcaps(compat_elf_hwcaps);
+       verify_local_elf_hwcaps();
  
         if (system_supports_sve())
                 verify_sve_features();
@@ -2812,6 +2898,34 @@ void __init setup_cpu_features(void)
                         ARCH_DMA_MINALIGN);
  }
  
+static int enable_mismatched_32bit_el0(unsigned int cpu)
+{
+       struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+       bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+
+       if (cpu_32bit) {
+               cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
+               static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0);
+               setup_elf_hwcaps(compat_elf_hwcaps);
+       }
+
+       return 0;
+}
+
+static int __init init_32bit_el0_mask(void)
+{
+       if (!allow_mismatched_32bit_el0)
+               return 0;
+
+       if (!zalloc_cpumask_var(&cpu_32bit_el0_mask, GFP_KERNEL))
+               return -ENOMEM;
+
+       return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+                                "arm64/mismatched_32bit_el0:online",
+                                enable_mismatched_32bit_el0, NULL);
+}
+subsys_initcall_sync(init_32bit_el0_mask);
+
  static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
  {
         cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
@@ -2905,8 +3019,8 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn)
  }
  
  static struct undef_hook mrs_hook = {
-       .instr_mask = 0xfff00000,
-       .instr_val  = 0xd5300000,
+       .instr_mask = 0xffff0000,
+       .instr_val  = 0xd5380000,
         .pstate_mask = PSR_AA32_MODE_MASK,
         .pstate_val = PSR_MODE_EL0t,
         .fn = emulate_mrs,
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c

index 51fcf99..87731fe 100644 (file)
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -246,7 +246,7 @@ static struct kobj_type cpuregs_kobj_type = {
                 struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj);             \
                                                                                 \
                 if (info->reg_midr)                                             \
-                       return sprintf(buf, "0x%016x\n", info->reg_##_field);   \
+                       return sprintf(buf, "0x%016llx\n", info->reg_##_field); \
                 else                                                            \
                         return 0;                                               \
         }                                                                       \
@@ -344,6 +344,32 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
         pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
  }
  
+static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info)
+{
+       info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+       info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
+       info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+       info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+       info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+       info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+       info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+       info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+       info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
+       info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+       info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+       info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+       info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+       info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
+       info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
+       info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+       info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+       info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
+
+       info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+       info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+       info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+}
+
  static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
  {
         info->reg_cntfrq = arch_timer_get_cntfrq();
@@ -371,31 +397,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
         info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
         info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
  
-       /* Update the 32bit ID registers only if AArch32 is implemented */
-       if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
-               info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
-               info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
-               info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
-               info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
-               info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
-               info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
-               info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
-               info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
-               info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
-               info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
-               info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
-               info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
-               info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
-               info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
-               info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
-               info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
-               info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
-               info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
-
-               info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
-               info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
-               info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
-       }
+       if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+               info->reg_gmid = read_cpuid(GMID_EL1);
+
+       if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+               __cpuinfo_store_cpu_32bit(&info->aarch32);
  
         if (IS_ENABLED(CONFIG_ARM64_SVE) &&
             id_aa64pfr0_sve(info->reg_id_aa64pfr0))
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S

index 0073b24..61a87fa 100644 (file)
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -28,7 +28,8 @@ SYM_CODE_START(efi_enter_kernel)
          * stale icache entries from before relocation.
          */
         ldr     w1, =kernel_size
-       bl      __clean_dcache_area_poc
+       add     x1, x0, x1
+       bl      dcache_clean_poc
         ic      ialluis
  
         /*
@@ -36,8 +37,8 @@ SYM_CODE_START(efi_enter_kernel)
          * so that we can safely disable the MMU and caches.
          */
         adr     x0, 0f
-       ldr     w1, 3f
-       bl      __clean_dcache_area_poc
+       adr     x1, 3f
+       bl      dcache_clean_poc
  0:
         /* Turn off Dcache and MMU */
         mrs     x0, CurrentEL
@@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel)
         mov     x2, xzr
         mov     x3, xzr
         br      x19
+3:
  SYM_CODE_END(efi_enter_kernel)
-3:     .long   . - 0b
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c

index 340d04e..12ce14a 100644 (file)
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,7 +6,11 @@
   */
  
  #include <linux/context_tracking.h>
+#include <linux/linkage.h>
+#include <linux/lockdep.h>
  #include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
  #include <linux/thread_info.h>
  
  #include <asm/cpufeature.h>
@@ -15,7 +19,11 @@
  #include <asm/exception.h>
  #include <asm/kprobes.h>
  #include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/sdei.h>
+#include <asm/stacktrace.h>
  #include <asm/sysreg.h>
+#include <asm/system_misc.h>
  
  /*
   * This is intended to match the logic in irqentry_enter(), handling the kernel
@@ -67,7 +75,7 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
         }
  }
  
-void noinstr arm64_enter_nmi(struct pt_regs *regs)
+static void noinstr arm64_enter_nmi(struct pt_regs *regs)
  {
         regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
  
@@ -80,7 +88,7 @@ void noinstr arm64_enter_nmi(struct pt_regs *regs)
         ftrace_nmi_enter();
  }
  
-void noinstr arm64_exit_nmi(struct pt_regs *regs)
+static void noinstr arm64_exit_nmi(struct pt_regs *regs)
  {
         bool restore = regs->lockdep_hardirqs;
  
@@ -97,7 +105,7 @@ void noinstr arm64_exit_nmi(struct pt_regs *regs)
         __nmi_exit();
  }
  
-asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
+static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
  {
         if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
                 arm64_enter_nmi(regs);
@@ -105,7 +113,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
                 enter_from_kernel_mode(regs);
  }
  
-asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
+static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
  {
         if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
                 arm64_exit_nmi(regs);
@@ -113,6 +121,65 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
                 exit_to_kernel_mode(regs);
  }
  
+static void __sched arm64_preempt_schedule_irq(void)
+{
+       lockdep_assert_irqs_disabled();
+
+       /*
+        * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
+        * priority masking is used the GIC irqchip driver will clear DAIF.IF
+        * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
+        * DAIF we must have handled an NMI, so skip preemption.
+        */
+       if (system_uses_irq_prio_masking() && read_sysreg(daif))
+               return;
+
+       /*
+        * Preempting a task from an IRQ means we leave copies of PSTATE
+        * on the stack. cpufeature's enable calls may modify PSTATE, but
+        * resuming one of these preempted tasks would undo those changes.
+        *
+        * Only allow a task to be preempted once cpufeatures have been
+        * enabled.
+        */
+       if (system_capabilities_finalized())
+               preempt_schedule_irq();
+}
+
+static void do_interrupt_handler(struct pt_regs *regs,
+                                void (*handler)(struct pt_regs *))
+{
+       if (on_thread_stack())
+               call_on_irq_stack(regs, handler);
+       else
+               handler(regs);
+}
+
+extern void (*handle_arch_irq)(struct pt_regs *);
+extern void (*handle_arch_fiq)(struct pt_regs *);
+
+static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
+                                     unsigned int esr)
+{
+       arm64_enter_nmi(regs);
+
+       console_verbose();
+
+       pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n",
+               vector, smp_processor_id(), esr,
+               esr_get_class_string(esr));
+
+       __show_regs(regs);
+       panic("Unhandled exception");
+}
+
+#define UNHANDLED(el, regsize, vector)                                                 \
+asmlinkage void noinstr el##_##regsize##_##vector##_handler(struct pt_regs *regs)      \
+{                                                                                      \
+       const char *desc = #regsize "-bit " #el " " #vector;                            \
+       __panic_unhandled(regs, desc, read_sysreg(esr_el1));                            \
+}
+
  #ifdef CONFIG_ARM64_ERRATUM_1463225
  static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
  
@@ -162,6 +229,11 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
  }
  #endif /* CONFIG_ARM64_ERRATUM_1463225 */
  
+UNHANDLED(el1t, 64, sync)
+UNHANDLED(el1t, 64, irq)
+UNHANDLED(el1t, 64, fiq)
+UNHANDLED(el1t, 64, error)
+
  static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
  {
         unsigned long far = read_sysreg(far_el1);
@@ -193,15 +265,6 @@ static void noinstr el1_undef(struct pt_regs *regs)
         exit_to_kernel_mode(regs);
  }
  
-static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
-{
-       enter_from_kernel_mode(regs);
-       local_daif_inherit(regs);
-       bad_mode(regs, 0, esr);
-       local_daif_mask();
-       exit_to_kernel_mode(regs);
-}
-
  static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
  {
         regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -245,7 +308,7 @@ static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
         exit_to_kernel_mode(regs);
  }
  
-asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
  {
         unsigned long esr = read_sysreg(esr_el1);
  
@@ -275,10 +338,50 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
                 el1_fpac(regs, esr);
                 break;
         default:
-               el1_inv(regs, esr);
+               __panic_unhandled(regs, "64-bit el1h sync", esr);
         }
  }
  
+static void noinstr el1_interrupt(struct pt_regs *regs,
+                                 void (*handler)(struct pt_regs *))
+{
+       write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+       enter_el1_irq_or_nmi(regs);
+       do_interrupt_handler(regs, handler);
+
+       /*
+        * Note: thread_info::preempt_count includes both thread_info::count
+        * and thread_info::need_resched, and is not equivalent to
+        * preempt_count().
+        */
+       if (IS_ENABLED(CONFIG_PREEMPTION) &&
+           READ_ONCE(current_thread_info()->preempt_count) == 0)
+               arm64_preempt_schedule_irq();
+
+       exit_el1_irq_or_nmi(regs);
+}
+
+asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
+{
+       el1_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
+{
+       el1_interrupt(regs, handle_arch_fiq);
+}
+
+asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
+{
+       unsigned long esr = read_sysreg(esr_el1);
+
+       local_daif_restore(DAIF_ERRCTX);
+       arm64_enter_nmi(regs);
+       do_serror(regs, esr);
+       arm64_exit_nmi(regs);
+}
+
  asmlinkage void noinstr enter_from_user_mode(void)
  {
         lockdep_hardirqs_off(CALLER_ADDR0);
@@ -398,7 +501,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
  
         enter_from_user_mode();
         do_debug_exception(far, esr, regs);
-       local_daif_restore(DAIF_PROCCTX_NOIRQ);
+       local_daif_restore(DAIF_PROCCTX);
  }
  
  static void noinstr el0_svc(struct pt_regs *regs)
@@ -415,7 +518,7 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
         do_ptrauth_fault(regs, esr);
  }
  
-asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
  {
         unsigned long esr = read_sysreg(esr_el1);
  
@@ -468,6 +571,56 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
         }
  }
  
+static void noinstr el0_interrupt(struct pt_regs *regs,
+                                 void (*handler)(struct pt_regs *))
+{
+       enter_from_user_mode();
+
+       write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+       if (regs->pc & BIT(55))
+               arm64_apply_bp_hardening();
+
+       do_interrupt_handler(regs, handler);
+}
+
+static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
+{
+       el0_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs)
+{
+       __el0_irq_handler_common(regs);
+}
+
+static void noinstr __el0_fiq_handler_common(struct pt_regs *regs)
+{
+       el0_interrupt(regs, handle_arch_fiq);
+}
+
+asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
+{
+       __el0_fiq_handler_common(regs);
+}
+
+static void __el0_error_handler_common(struct pt_regs *regs)
+{
+       unsigned long esr = read_sysreg(esr_el1);
+
+       enter_from_user_mode();
+       local_daif_restore(DAIF_ERRCTX);
+       arm64_enter_nmi(regs);
+       do_serror(regs, esr);
+       arm64_exit_nmi(regs);
+       local_daif_restore(DAIF_PROCCTX);
+}
+
+asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
+{
+       __el0_error_handler_common(regs);
+}
+
  #ifdef CONFIG_COMPAT
  static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
  {
@@ -483,7 +636,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
         do_el0_svc_compat(regs);
  }
  
-asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
  {
         unsigned long esr = read_sysreg(esr_el1);
  
@@ -526,4 +679,71 @@ asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
                 el0_inv(regs, esr);
         }
  }
+
+asmlinkage void noinstr el0t_32_irq_handler(struct pt_regs *regs)
+{
+       __el0_irq_handler_common(regs);
+}
+
+asmlinkage void noinstr el0t_32_fiq_handler(struct pt_regs *regs)
+{
+       __el0_fiq_handler_common(regs);
+}
+
+asmlinkage void noinstr el0t_32_error_handler(struct pt_regs *regs)
+{
+       __el0_error_handler_common(regs);
+}
+#else /* CONFIG_COMPAT */
+UNHANDLED(el0t, 32, sync)
+UNHANDLED(el0t, 32, irq)
+UNHANDLED(el0t, 32, fiq)
+UNHANDLED(el0t, 32, error)
  #endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_VMAP_STACK
+asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
+{
+       unsigned int esr = read_sysreg(esr_el1);
+       unsigned long far = read_sysreg(far_el1);
+
+       arm64_enter_nmi(regs);
+       panic_bad_stack(regs, esr, far);
+}
+#endif /* CONFIG_VMAP_STACK */
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+asmlinkage noinstr unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+       unsigned long ret;
+
+       /*
+        * We didn't take an exception to get here, so the HW hasn't
+        * set/cleared bits in PSTATE that we may rely on.
+        *
+        * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
+        * whether PSTATE bits are inherited unchanged or generated from
+        * scratch, and the TF-A implementation always clears PAN and always
+        * clears UAO. There are no other known implementations.
+        *
+        * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
+        * PSTATE is modified upon architectural exceptions, and so PAN is
+        * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
+        * cleared.
+        *
+        * We must explicitly reset PAN to the expected state, including
+        * clearing it when the host isn't using it, in case a VM had it set.
+        */
+       if (system_uses_hw_pan())
+               set_pstate_pan(1);
+       else if (cpu_has_pan())
+               set_pstate_pan(0);
+
+       arm64_enter_nmi(regs);
+       ret = do_sdei_event(regs, arg);
+       arm64_exit_nmi(regs);
+
+       return ret;
+}
+#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S

index 3513984..863d44f 100644 (file)
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -33,12 +33,6 @@
   * Context tracking and irqflag tracing need to instrument transitions between
   * user and kernel mode.
   */
-       .macro user_exit_irqoff
-#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
-       bl      enter_from_user_mode
-#endif
-       .endm
-
         .macro user_enter_irqoff
  #if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
         bl      exit_to_user_mode
@@ -51,16 +45,7 @@
         .endr
         .endm
  
-/*
- * Bad Abort numbers
- *-----------------
- */
-#define BAD_SYNC       0
-#define BAD_IRQ                1
-#define BAD_FIQ                2
-#define BAD_ERROR      3
-
-       .macro kernel_ventry, el, label, regsize = 64
+       .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
         .align 7
  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
         .if     \el == 0
@@ -87,7 +72,7 @@ alternative_else_nop_endif
         tbnz    x0, #THREAD_SHIFT, 0f
         sub     x0, sp, x0                      // x0'' = sp' - x0' = (sp + x0) - sp = x0
         sub     sp, sp, x0                      // sp'' = sp' - x0 = (sp + x0) - x0 = sp
-       b       el\()\el\()_\label
+       b       el\el\ht\()_\regsize\()_\label
  
  0:
         /*
@@ -119,7 +104,7 @@ alternative_else_nop_endif
         sub     sp, sp, x0
         mrs     x0, tpidrro_el0
  #endif
-       b       el\()\el\()_\label
+       b       el\el\ht\()_\regsize\()_\label
         .endm
  
         .macro tramp_alias, dst, sym
@@ -275,7 +260,7 @@ alternative_else_nop_endif
  
         mte_set_kernel_gcr x22, x23
  
-       scs_load tsk, x20
+       scs_load tsk
         .else
         add     x21, sp, #PT_REGS_SIZE
         get_current_task tsk
@@ -285,7 +270,7 @@ alternative_else_nop_endif
         stp     lr, x21, [sp, #S_LR]
  
         /*
-        * For exceptions from EL0, create a terminal frame record.
+        * For exceptions from EL0, create a final frame record.
          * For exceptions from EL1, create a synthetic frame record so the
          * interrupted code shows up in the backtrace.
          */
@@ -375,7 +360,7 @@ alternative_if ARM64_WORKAROUND_845719
  alternative_else_nop_endif
  #endif
  3:
-       scs_save tsk, x0
+       scs_save tsk
  
  #ifdef CONFIG_ARM64_PTR_AUTH
  alternative_if ARM64_HAS_ADDRESS_AUTH
@@ -486,63 +471,12 @@ SYM_CODE_START_LOCAL(__swpan_exit_el0)
  SYM_CODE_END(__swpan_exit_el0)
  #endif
  
-       .macro  irq_stack_entry
-       mov     x19, sp                 // preserve the original sp
-#ifdef CONFIG_SHADOW_CALL_STACK
-       mov     x24, scs_sp             // preserve the original shadow stack
-#endif
-
-       /*
-        * Compare sp with the base of the task stack.
-        * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
-        * and should switch to the irq stack.
-        */
-       ldr     x25, [tsk, TSK_STACK]
-       eor     x25, x25, x19
-       and     x25, x25, #~(THREAD_SIZE - 1)
-       cbnz    x25, 9998f
-
-       ldr_this_cpu x25, irq_stack_ptr, x26
-       mov     x26, #IRQ_STACK_SIZE
-       add     x26, x25, x26
-
-       /* switch to the irq stack */
-       mov     sp, x26
-
-#ifdef CONFIG_SHADOW_CALL_STACK
-       /* also switch to the irq shadow stack */
-       ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x26
-#endif
-
-9998:
-       .endm
-
-       /*
-        * The callee-saved regs (x19-x29) should be preserved between
-        * irq_stack_entry and irq_stack_exit, but note that kernel_entry
-        * uses x20-x23 to store data for later use.
-        */
-       .macro  irq_stack_exit
-       mov     sp, x19
-#ifdef CONFIG_SHADOW_CALL_STACK
-       mov     scs_sp, x24
-#endif
-       .endm
-
  /* GPRs used by entry code */
  tsk    .req    x28             // current thread_info
  
  /*
   * Interrupt handling.
   */
-       .macro  irq_handler, handler:req
-       ldr_l   x1, \handler
-       mov     x0, sp
-       irq_stack_entry
-       blr     x1
-       irq_stack_exit
-       .endm
-
         .macro  gic_prio_kentry_setup, tmp:req
  #ifdef CONFIG_ARM64_PSEUDO_NMI
         alternative_if ARM64_HAS_IRQ_PRIO_MASKING
@@ -552,45 +486,6 @@ tsk        .req    x28             // current thread_info
  #endif
         .endm
  
-       .macro el1_interrupt_handler, handler:req
-       enable_da
-
-       mov     x0, sp
-       bl      enter_el1_irq_or_nmi
-
-       irq_handler     \handler
-
-#ifdef CONFIG_PREEMPTION
-       ldr     x24, [tsk, #TSK_TI_PREEMPT]     // get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-       /*
-        * DA were cleared at start of handling, and IF are cleared by
-        * the GIC irqchip driver using gic_arch_enable_irqs() for
-        * normal IRQs. If anything is set, it means we come back from
-        * an NMI instead of a normal IRQ, so skip preemption
-        */
-       mrs     x0, daif
-       orr     x24, x24, x0
-alternative_else_nop_endif
-       cbnz    x24, 1f                         // preempt count != 0 || NMI return path
-       bl      arm64_preempt_schedule_irq      // irq en/disable is done inside
-1:
-#endif
-
-       mov     x0, sp
-       bl      exit_el1_irq_or_nmi
-       .endm
-
-       .macro el0_interrupt_handler, handler:req
-       user_exit_irqoff
-       enable_da
-
-       tbz     x22, #55, 1f
-       bl      do_el0_irq_bp_hardening
-1:
-       irq_handler     \handler
-       .endm
-
         .text
  
  /*
@@ -600,32 +495,25 @@ alternative_else_nop_endif
  
         .align  11
  SYM_CODE_START(vectors)
-       kernel_ventry   1, sync_invalid                 // Synchronous EL1t
-       kernel_ventry   1, irq_invalid                  // IRQ EL1t
-       kernel_ventry   1, fiq_invalid                  // FIQ EL1t
-       kernel_ventry   1, error_invalid                // Error EL1t
-
-       kernel_ventry   1, sync                         // Synchronous EL1h
-       kernel_ventry   1, irq                          // IRQ EL1h
-       kernel_ventry   1, fiq                          // FIQ EL1h
-       kernel_ventry   1, error                        // Error EL1h
-
-       kernel_ventry   0, sync                         // Synchronous 64-bit EL0
-       kernel_ventry   0, irq                          // IRQ 64-bit EL0
-       kernel_ventry   0, fiq                          // FIQ 64-bit EL0
-       kernel_ventry   0, error                        // Error 64-bit EL0
-
-#ifdef CONFIG_COMPAT
-       kernel_ventry   0, sync_compat, 32              // Synchronous 32-bit EL0
-       kernel_ventry   0, irq_compat, 32               // IRQ 32-bit EL0
-       kernel_ventry   0, fiq_compat, 32               // FIQ 32-bit EL0
-       kernel_ventry   0, error_compat, 32             // Error 32-bit EL0
-#else
-       kernel_ventry   0, sync_invalid, 32             // Synchronous 32-bit EL0
-       kernel_ventry   0, irq_invalid, 32              // IRQ 32-bit EL0
-       kernel_ventry   0, fiq_invalid, 32              // FIQ 32-bit EL0
-       kernel_ventry   0, error_invalid, 32            // Error 32-bit EL0
-#endif
+       kernel_ventry   1, t, 64, sync          // Synchronous EL1t
+       kernel_ventry   1, t, 64, irq           // IRQ EL1t
+       kernel_ventry   1, t, 64, fiq           // FIQ EL1h
+       kernel_ventry   1, t, 64, error         // Error EL1t
+
+       kernel_ventry   1, h, 64, sync          // Synchronous EL1h
+       kernel_ventry   1, h, 64, irq           // IRQ EL1h
+       kernel_ventry   1, h, 64, fiq           // FIQ EL1h
+       kernel_ventry   1, h, 64, error         // Error EL1h
+
+       kernel_ventry   0, t, 64, sync          // Synchronous 64-bit EL0
+       kernel_ventry   0, t, 64, irq           // IRQ 64-bit EL0
+       kernel_ventry   0, t, 64, fiq           // FIQ 64-bit EL0
+       kernel_ventry   0, t, 64, error         // Error 64-bit EL0
+
+       kernel_ventry   0, t, 32, sync          // Synchronous 32-bit EL0
+       kernel_ventry   0, t, 32, irq           // IRQ 32-bit EL0
+       kernel_ventry   0, t, 32, fiq           // FIQ 32-bit EL0
+       kernel_ventry   0, t, 32, error         // Error 32-bit EL0
  SYM_CODE_END(vectors)
  
  #ifdef CONFIG_VMAP_STACK
@@ -656,147 +544,46 @@ __bad_stack:
         ASM_BUG()
  #endif /* CONFIG_VMAP_STACK */
  
-/*
- * Invalid mode handlers
- */
-       .macro  inv_entry, el, reason, regsize = 64
+
+       .macro entry_handler el:req, ht:req, regsize:req, label:req
+SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
         kernel_entry \el, \regsize
         mov     x0, sp
-       mov     x1, #\reason
-       mrs     x2, esr_el1
-       bl      bad_mode
-       ASM_BUG()
+       bl      el\el\ht\()_\regsize\()_\label\()_handler
+       .if \el == 0
+       b       ret_to_user
+       .else
+       b       ret_to_kernel
+       .endif
+SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
         .endm
  
-SYM_CODE_START_LOCAL(el0_sync_invalid)
-       inv_entry 0, BAD_SYNC
-SYM_CODE_END(el0_sync_invalid)
-
-SYM_CODE_START_LOCAL(el0_irq_invalid)
-       inv_entry 0, BAD_IRQ
-SYM_CODE_END(el0_irq_invalid)
-
-SYM_CODE_START_LOCAL(el0_fiq_invalid)
-       inv_entry 0, BAD_FIQ
-SYM_CODE_END(el0_fiq_invalid)
-
-SYM_CODE_START_LOCAL(el0_error_invalid)
-       inv_entry 0, BAD_ERROR
-SYM_CODE_END(el0_error_invalid)
-
-SYM_CODE_START_LOCAL(el1_sync_invalid)
-       inv_entry 1, BAD_SYNC
-SYM_CODE_END(el1_sync_invalid)
-
-SYM_CODE_START_LOCAL(el1_irq_invalid)
-       inv_entry 1, BAD_IRQ
-SYM_CODE_END(el1_irq_invalid)
-
-SYM_CODE_START_LOCAL(el1_fiq_invalid)
-       inv_entry 1, BAD_FIQ
-SYM_CODE_END(el1_fiq_invalid)
-
-SYM_CODE_START_LOCAL(el1_error_invalid)
-       inv_entry 1, BAD_ERROR
-SYM_CODE_END(el1_error_invalid)
-
  /*
- * EL1 mode handlers.
+ * Early exception handlers
   */
-       .align  6
-SYM_CODE_START_LOCAL_NOALIGN(el1_sync)
-       kernel_entry 1
-       mov     x0, sp
-       bl      el1_sync_handler
-       kernel_exit 1
-SYM_CODE_END(el1_sync)
-
-       .align  6
-SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
-       kernel_entry 1
-       el1_interrupt_handler handle_arch_irq
-       kernel_exit 1
-SYM_CODE_END(el1_irq)
-
-SYM_CODE_START_LOCAL_NOALIGN(el1_fiq)
-       kernel_entry 1
-       el1_interrupt_handler handle_arch_fiq
-       kernel_exit 1
-SYM_CODE_END(el1_fiq)
-
-/*
- * EL0 mode handlers.
- */
-       .align  6
-SYM_CODE_START_LOCAL_NOALIGN(el0_sync)
-       kernel_entry 0
-       mov     x0, sp
-       bl      el0_sync_handler
-       b       ret_to_user
-SYM_CODE_END(el0_sync)
-
-#ifdef CONFIG_COMPAT
-       .align  6
-SYM_CODE_START_LOCAL_NOALIGN(el0_sync_compat)
-       kernel_entry 0, 32
-       mov     x0, sp
-       bl      el0_sync_compat_handler
-       b       ret_to_user
-SYM_CODE_END(el0_sync_compat)
-
-       .align  6
-SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat)
-       kernel_entry 0, 32
-       b       el0_irq_naked
-SYM_CODE_END(el0_irq_compat)
-
-SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat)
-       kernel_entry 0, 32
-       b       el0_fiq_naked
-SYM_CODE_END(el0_fiq_compat)
-
-SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
-       kernel_entry 0, 32
-       b       el0_error_naked
-SYM_CODE_END(el0_error_compat)
-#endif
-
-       .align  6
-SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
-       kernel_entry 0
-el0_irq_naked:
-       el0_interrupt_handler handle_arch_irq
-       b       ret_to_user
-SYM_CODE_END(el0_irq)
-
-SYM_CODE_START_LOCAL_NOALIGN(el0_fiq)
-       kernel_entry 0
-el0_fiq_naked:
-       el0_interrupt_handler handle_arch_fiq
-       b       ret_to_user
-SYM_CODE_END(el0_fiq)
-
-SYM_CODE_START_LOCAL(el1_error)
-       kernel_entry 1
-       mrs     x1, esr_el1
-       enable_dbg
-       mov     x0, sp
-       bl      do_serror
+       entry_handler   1, t, 64, sync
+       entry_handler   1, t, 64, irq
+       entry_handler   1, t, 64, fiq
+       entry_handler   1, t, 64, error
+
+       entry_handler   1, h, 64, sync
+       entry_handler   1, h, 64, irq
+       entry_handler   1, h, 64, fiq
+       entry_handler   1, h, 64, error
+
+       entry_handler   0, t, 64, sync
+       entry_handler   0, t, 64, irq
+       entry_handler   0, t, 64, fiq
+       entry_handler   0, t, 64, error
+
+       entry_handler   0, t, 32, sync
+       entry_handler   0, t, 32, irq
+       entry_handler   0, t, 32, fiq
+       entry_handler   0, t, 32, error
+
+SYM_CODE_START_LOCAL(ret_to_kernel)
         kernel_exit 1
-SYM_CODE_END(el1_error)
-
-SYM_CODE_START_LOCAL(el0_error)
-       kernel_entry 0
-el0_error_naked:
-       mrs     x25, esr_el1
-       user_exit_irqoff
-       enable_dbg
-       mov     x0, sp
-       mov     x1, x25
-       bl      do_serror
-       enable_da
-       b       ret_to_user
-SYM_CODE_END(el0_error)
+SYM_CODE_END(ret_to_kernel)
  
  /*
   * "slow" syscall return path.
@@ -979,8 +766,8 @@ SYM_FUNC_START(cpu_switch_to)
         mov     sp, x9
         msr     sp_el0, x1
         ptrauth_keys_install_kernel x1, x8, x9, x10
-       scs_save x0, x8
-       scs_load x1, x8
+       scs_save x0
+       scs_load x1
         ret
  SYM_FUNC_END(cpu_switch_to)
  NOKPROBE(cpu_switch_to)
@@ -998,6 +785,42 @@ SYM_CODE_START(ret_from_fork)
  SYM_CODE_END(ret_from_fork)
  NOKPROBE(ret_from_fork)
  
+/*
+ * void call_on_irq_stack(struct pt_regs *regs,
+ *                       void (*func)(struct pt_regs *));
+ *
+ * Calls func(regs) using this CPU's irq stack and shadow irq stack.
+ */
+SYM_FUNC_START(call_on_irq_stack)
+#ifdef CONFIG_SHADOW_CALL_STACK
+       stp     scs_sp, xzr, [sp, #-16]!
+       ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17
+#endif
+       /* Create a frame record to save our LR and SP (implicit in FP) */
+       stp     x29, x30, [sp, #-16]!
+       mov     x29, sp
+
+       ldr_this_cpu x16, irq_stack_ptr, x17
+       mov     x15, #IRQ_STACK_SIZE
+       add     x16, x16, x15
+
+       /* Move to the new stack and call the function there */
+       mov     sp, x16
+       blr     x1
+
+       /*
+        * Restore the SP from the FP, and restore the FP and LR from the frame
+        * record.
+        */
+       mov     sp, x29
+       ldp     x29, x30, [sp], #16
+#ifdef CONFIG_SHADOW_CALL_STACK
+       ldp     scs_sp, xzr, [sp], #16
+#endif
+       ret
+SYM_FUNC_END(call_on_irq_stack)
+NOKPROBE(call_on_irq_stack)
+
  #ifdef CONFIG_ARM_SDE_INTERFACE
  
  #include <asm/sdei.h>
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c

index b5d3dda..7f467bd 100644 (file)
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -15,6 +15,7 @@
  #include <asm/debug-monitors.h>
  #include <asm/ftrace.h>
  #include <asm/insn.h>
+#include <asm/patching.h>
  
  #ifdef CONFIG_DYNAMIC_FTRACE
  /*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S

index 3b88000..c5c994a 100644 (file)
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -16,6 +16,7 @@
  #include <asm/asm_pointer_auth.h>
  #include <asm/assembler.h>
  #include <asm/boot.h>
+#include <asm/bug.h>
  #include <asm/ptrace.h>
  #include <asm/asm-offsets.h>
  #include <asm/cache.h>
@@ -117,8 +118,8 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
         dmb     sy                              // needed before dc ivac with
                                                 // MMU off
  
-       mov     x1, #0x20                       // 4 x 8 bytes
-       b       __inval_dcache_area             // tail call
+       add     x1, x0, #0x20                   // 4 x 8 bytes
+       b       dcache_inval_poc                // tail call
  SYM_CODE_END(preserve_boot_args)
  
  /*
@@ -268,8 +269,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
          */
         adrp    x0, init_pg_dir
         adrp    x1, init_pg_end
-       sub     x1, x1, x0
-       bl      __inval_dcache_area
+       bl      dcache_inval_poc
  
         /*
          * Clear the init page tables.
@@ -381,39 +381,57 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
  
         adrp    x0, idmap_pg_dir
         adrp    x1, idmap_pg_end
-       sub     x1, x1, x0
-       bl      __inval_dcache_area
+       bl      dcache_inval_poc
  
         adrp    x0, init_pg_dir
         adrp    x1, init_pg_end
-       sub     x1, x1, x0
-       bl      __inval_dcache_area
+       bl      dcache_inval_poc
  
         ret     x28
  SYM_FUNC_END(__create_page_tables)
  
+       /*
+        * Initialize CPU registers with task-specific and cpu-specific context.
+        *
+        * Create a final frame record at task_pt_regs(current)->stackframe, so
+        * that the unwinder can identify the final frame record of any task by
+        * its location in the task stack. We reserve the entire pt_regs space
+        * for consistency with user tasks and kthreads.
+        */
+       .macro  init_cpu_task tsk, tmp1, tmp2
+       msr     sp_el0, \tsk
+
+       ldr     \tmp1, [\tsk, #TSK_STACK]
+       add     sp, \tmp1, #THREAD_SIZE
+       sub     sp, sp, #PT_REGS_SIZE
+
+       stp     xzr, xzr, [sp, #S_STACKFRAME]
+       add     x29, sp, #S_STACKFRAME
+
+       scs_load \tsk
+
+       adr_l   \tmp1, __per_cpu_offset
+       ldr     w\tmp2, [\tsk, #TSK_CPU]
+       ldr     \tmp1, [\tmp1, \tmp2, lsl #3]
+       set_this_cpu_offset \tmp1
+       .endm
+
  /*
   * The following fragment of code is executed with the MMU enabled.
   *
   *   x0 = __PHYS_OFFSET
   */
  SYM_FUNC_START_LOCAL(__primary_switched)
-       adrp    x4, init_thread_union
-       add     sp, x4, #THREAD_SIZE
-       adr_l   x5, init_task
-       msr     sp_el0, x5                      // Save thread_info
+       adr_l   x4, init_task
+       init_cpu_task x4, x5, x6
  
         adr_l   x8, vectors                     // load VBAR_EL1 with virtual
         msr     vbar_el1, x8                    // vector table address
         isb
  
-       stp     xzr, x30, [sp, #-16]!
+       stp     x29, x30, [sp, #-16]!
         mov     x29, sp
  
-#ifdef CONFIG_SHADOW_CALL_STACK
-       adr_l   scs_sp, init_shadow_call_stack  // Set shadow call stack
-#endif
-
         str_l   x21, __fdt_pointer, x5          // Save FDT pointer
  
         ldr_l   x4, kimage_vaddr                // Save the offset between
@@ -445,10 +463,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
  0:
  #endif
         bl      switch_to_vhe                   // Prefer VHE if possible
-       add     sp, sp, #16
-       mov     x29, #0
-       mov     x30, #0
-       b       start_kernel
+       ldp     x29, x30, [sp], #16
+       bl      start_kernel
+       ASM_BUG()
  SYM_FUNC_END(__primary_switched)
  
         .pushsection ".rodata", "a"
@@ -631,21 +648,17 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
         isb
  
         adr_l   x0, secondary_data
-       ldr     x1, [x0, #CPU_BOOT_STACK]       // get secondary_data.stack
-       cbz     x1, __secondary_too_slow
-       mov     sp, x1
         ldr     x2, [x0, #CPU_BOOT_TASK]
         cbz     x2, __secondary_too_slow
-       msr     sp_el0, x2
-       scs_load x2, x3
-       mov     x29, #0
-       mov     x30, #0
+
+       init_cpu_task x2, x1, x3
  
  #ifdef CONFIG_ARM64_PTR_AUTH
         ptrauth_keys_init_cpu x2, x3, x4, x5
  #endif
  
-       b       secondary_start_kernel
+       bl      secondary_start_kernel
+       ASM_BUG()
  SYM_FUNC_END(__secondary_switched)
  
  SYM_FUNC_START_LOCAL(__secondary_too_slow)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S

index 8ccca66..81c0186 100644 (file)
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -45,7 +45,7 @@
   * Because this code has to be copied to a 'safe' page, it can't call out to
   * other functions by PC-relative address. Also remember that it may be
   * mid-way through over-writing other functions. For this reason it contains
- * code from flush_icache_range() and uses the copy_page() macro.
+ * code from caches_clean_inval_pou() and uses the copy_page() macro.
   *
   * This 'safe' page is mapped via ttbr0, and executed from there. This function
   * switches to a copy of the linear map in ttbr1, performs the restore, then
@@ -87,11 +87,12 @@ SYM_CODE_START(swsusp_arch_suspend_exit)
         copy_page       x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
  
         add     x1, x10, #PAGE_SIZE
-       /* Clean the copied page to PoU - based on flush_icache_range() */
+       /* Clean the copied page to PoU - based on caches_clean_inval_pou() */
         raw_dcache_line_size x2, x3
         sub     x3, x2, #1
         bic     x4, x10, x3
-2:     dc      cvau, x4        /* clean D line / unified line */
+2:     /* clean D line / unified line */
+alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
         add     x4, x4, x2
         cmp     x4, x1
         b.lo    2b
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c

index b1cef37..46a0b4d 100644 (file)
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
                 return -ENOMEM;
  
         memcpy(page, src_start, length);
-       __flush_icache_range((unsigned long)page, (unsigned long)page + length);
+       caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length);
         rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
         if (rc)
                 return rc;
@@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length,
         return 0;
  }
  
-#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start))
-
  #ifdef CONFIG_ARM64_MTE
  
  static DEFINE_XARRAY(mte_pages);
@@ -383,13 +381,18 @@ int swsusp_arch_suspend(void)
                 ret = swsusp_save();
         } else {
                 /* Clean kernel core startup/idle code to PoC*/
-               dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
-               dcache_clean_range(__idmap_text_start, __idmap_text_end);
+               dcache_clean_inval_poc((unsigned long)__mmuoff_data_start,
+                                   (unsigned long)__mmuoff_data_end);
+               dcache_clean_inval_poc((unsigned long)__idmap_text_start,
+                                   (unsigned long)__idmap_text_end);
  
                 /* Clean kvm setup code to PoC? */
                 if (el2_reset_needed()) {
-                       dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
-                       dcache_clean_range(__hyp_text_start, __hyp_text_end);
+                       dcache_clean_inval_poc(
+                               (unsigned long)__hyp_idmap_text_start,
+                               (unsigned long)__hyp_idmap_text_end);
+                       dcache_clean_inval_poc((unsigned long)__hyp_text_start,
+                                           (unsigned long)__hyp_text_end);
                 }
  
                 swsusp_mte_restore_tags();
@@ -474,7 +477,8 @@ int swsusp_arch_resume(void)
          * The hibernate exit text contains a set of el2 vectors, that will
          * be executed at el2 with the mmu off in order to reload hyp-stub.
          */
-       __flush_dcache_area(hibernate_exit, exit_size);
+       dcache_clean_inval_poc((unsigned long)hibernate_exit,
+                           (unsigned long)hibernate_exit + exit_size);
  
         /*
          * KASLR will cause the el2 vectors to be in a different location in
diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c

new file mode 100644 (file)

index 0000000..a2cfbac
--- /dev/null
+++ b/arch/arm64/kernel/idle.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Low-level idle sequences
+ */
+
+#include <linux/cpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/barrier.h>
+#include <asm/cpuidle.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+/*
+ *     cpu_do_idle()
+ *
+ *     Idle the processor (wait for interrupt).
+ *
+ *     If the CPU supports priority masking we must do additional work to
+ *     ensure that interrupts are not masked at the PMR (because the core will
+ *     not wake up if we block the wake up signal in the interrupt controller).
+ */
+void noinstr cpu_do_idle(void)
+{
+       struct arm_cpuidle_irq_context context;
+
+       arm_cpuidle_save_irq_context(&context);
+
+       dsb(sy);
+       wfi();
+
+       arm_cpuidle_restore_irq_context(&context);
+}
+
+/*
+ * This is our default idle handler.
+ */
+void noinstr arch_cpu_idle(void)
+{
+       /*
+        * This should do all the clock switching and wait for interrupt
+        * tricks
+        */
+       cpu_do_idle();
+       raw_local_irq_enable();
+}
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c

index e628c8c..53a381a 100644 (file)
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void)
  
         for (i = 0; i < ARRAY_SIZE(regs); i++) {
                 if (regs[i]->override)
-                       __flush_dcache_area(regs[i]->override,
+                       dcache_clean_inval_poc((unsigned long)regs[i]->override,
+                                           (unsigned long)regs[i]->override +
                                             sizeof(*regs[i]->override));
         }
  }
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h

index bcf3c27..c96a9a0 100644 (file)
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -35,7 +35,7 @@ __efistub_strnlen             = __pi_strnlen;
  __efistub_strcmp               = __pi_strcmp;
  __efistub_strncmp              = __pi_strncmp;
  __efistub_strrchr              = __pi_strrchr;
-__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc;
+__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
  
  #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
  __efistub___memcpy             = __pi_memcpy;
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c

deleted file mode 100644 (file)

index 6c0de2f..0000000
--- a/arch/arm64/kernel/insn.c
+++ /dev/null
@@ -1,1699 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2013 Huawei Ltd.
- * Author: Jiang Liu <liuj97@gmail.com>
- *
- * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
- */
-#include <linux/bitops.h>
-#include <linux/bug.h>
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/stop_machine.h>
-#include <linux/types.h>
-#include <linux/uaccess.h>
-
-#include <asm/cacheflush.h>
-#include <asm/debug-monitors.h>
-#include <asm/fixmap.h>
-#include <asm/insn.h>
-#include <asm/kprobes.h>
-#include <asm/sections.h>
-
-#define AARCH64_INSN_SF_BIT    BIT(31)
-#define AARCH64_INSN_N_BIT     BIT(22)
-#define AARCH64_INSN_LSL_12    BIT(22)
-
-static const int aarch64_insn_encoding_class[] = {
-       AARCH64_INSN_CLS_UNKNOWN,
-       AARCH64_INSN_CLS_UNKNOWN,
-       AARCH64_INSN_CLS_UNKNOWN,
-       AARCH64_INSN_CLS_UNKNOWN,
-       AARCH64_INSN_CLS_LDST,
-       AARCH64_INSN_CLS_DP_REG,
-       AARCH64_INSN_CLS_LDST,
-       AARCH64_INSN_CLS_DP_FPSIMD,
-       AARCH64_INSN_CLS_DP_IMM,
-       AARCH64_INSN_CLS_DP_IMM,
-       AARCH64_INSN_CLS_BR_SYS,
-       AARCH64_INSN_CLS_BR_SYS,
-       AARCH64_INSN_CLS_LDST,
-       AARCH64_INSN_CLS_DP_REG,
-       AARCH64_INSN_CLS_LDST,
-       AARCH64_INSN_CLS_DP_FPSIMD,
-};
-
-enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
-{
-       return aarch64_insn_encoding_class[(insn >> 25) & 0xf];
-}
-
-bool __kprobes aarch64_insn_is_steppable_hint(u32 insn)
-{
-       if (!aarch64_insn_is_hint(insn))
-               return false;
-
-       switch (insn & 0xFE0) {
-       case AARCH64_INSN_HINT_XPACLRI:
-       case AARCH64_INSN_HINT_PACIA_1716:
-       case AARCH64_INSN_HINT_PACIB_1716:
-       case AARCH64_INSN_HINT_PACIAZ:
-       case AARCH64_INSN_HINT_PACIASP:
-       case AARCH64_INSN_HINT_PACIBZ:
-       case AARCH64_INSN_HINT_PACIBSP:
-       case AARCH64_INSN_HINT_BTI:
-       case AARCH64_INSN_HINT_BTIC:
-       case AARCH64_INSN_HINT_BTIJ:
-       case AARCH64_INSN_HINT_BTIJC:
-       case AARCH64_INSN_HINT_NOP:
-               return true;
-       default:
-               return false;
-       }
-}
-
-bool aarch64_insn_is_branch_imm(u32 insn)
-{
-       return (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) ||
-               aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) ||
-               aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
-               aarch64_insn_is_bcond(insn));
-}
-
-static DEFINE_RAW_SPINLOCK(patch_lock);
-
-static bool is_exit_text(unsigned long addr)
-{
-       /* discarded with init text/data */
-       return system_state < SYSTEM_RUNNING &&
-               addr >= (unsigned long)__exittext_begin &&
-               addr < (unsigned long)__exittext_end;
-}
-
-static bool is_image_text(unsigned long addr)
-{
-       return core_kernel_text(addr) || is_exit_text(addr);
-}
-
-static void __kprobes *patch_map(void *addr, int fixmap)
-{
-       unsigned long uintaddr = (uintptr_t) addr;
-       bool image = is_image_text(uintaddr);
-       struct page *page;
-
-       if (image)
-               page = phys_to_page(__pa_symbol(addr));
-       else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
-               page = vmalloc_to_page(addr);
-       else
-               return addr;
-
-       BUG_ON(!page);
-       return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
-                       (uintaddr & ~PAGE_MASK));
-}
-
-static void __kprobes patch_unmap(int fixmap)
-{
-       clear_fixmap(fixmap);
-}
-/*
- * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
- * little-endian.
- */
-int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
-{
-       int ret;
-       __le32 val;
-
-       ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
-       if (!ret)
-               *insnp = le32_to_cpu(val);
-
-       return ret;
-}
-
-static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
-{
-       void *waddr = addr;
-       unsigned long flags = 0;
-       int ret;
-
-       raw_spin_lock_irqsave(&patch_lock, flags);
-       waddr = patch_map(addr, FIX_TEXT_POKE0);
-
-       ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
-
-       patch_unmap(FIX_TEXT_POKE0);
-       raw_spin_unlock_irqrestore(&patch_lock, flags);
-
-       return ret;
-}
-
-int __kprobes aarch64_insn_write(void *addr, u32 insn)
-{
-       return __aarch64_insn_write(addr, cpu_to_le32(insn));
-}
-
-bool __kprobes aarch64_insn_uses_literal(u32 insn)
-{
-       /* ldr/ldrsw (literal), prfm */
-
-       return aarch64_insn_is_ldr_lit(insn) ||
-               aarch64_insn_is_ldrsw_lit(insn) ||
-               aarch64_insn_is_adr_adrp(insn) ||
-               aarch64_insn_is_prfm_lit(insn);
-}
-
-bool __kprobes aarch64_insn_is_branch(u32 insn)
-{
-       /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */
-
-       return aarch64_insn_is_b(insn) ||
-               aarch64_insn_is_bl(insn) ||
-               aarch64_insn_is_cbz(insn) ||
-               aarch64_insn_is_cbnz(insn) ||
-               aarch64_insn_is_tbz(insn) ||
-               aarch64_insn_is_tbnz(insn) ||
-               aarch64_insn_is_ret(insn) ||
-               aarch64_insn_is_ret_auth(insn) ||
-               aarch64_insn_is_br(insn) ||
-               aarch64_insn_is_br_auth(insn) ||
-               aarch64_insn_is_blr(insn) ||
-               aarch64_insn_is_blr_auth(insn) ||
-               aarch64_insn_is_bcond(insn);
-}
-
-int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
-{
-       u32 *tp = addr;
-       int ret;
-
-       /* A64 instructions must be word aligned */
-       if ((uintptr_t)tp & 0x3)
-               return -EINVAL;
-
-       ret = aarch64_insn_write(tp, insn);
-       if (ret == 0)
-               __flush_icache_range((uintptr_t)tp,
-                                    (uintptr_t)tp + AARCH64_INSN_SIZE);
-
-       return ret;
-}
-
-struct aarch64_insn_patch {
-       void            **text_addrs;
-       u32             *new_insns;
-       int             insn_cnt;
-       atomic_t        cpu_count;
-};
-
-static int __kprobes aarch64_insn_patch_text_cb(void *arg)
-{
-       int i, ret = 0;
-       struct aarch64_insn_patch *pp = arg;
-
-       /* The first CPU becomes master */
-       if (atomic_inc_return(&pp->cpu_count) == 1) {
-               for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
-                       ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
-                                                            pp->new_insns[i]);
-               /* Notify other processors with an additional increment. */
-               atomic_inc(&pp->cpu_count);
-       } else {
-               while (atomic_read(&pp->cpu_count) <= num_online_cpus())
-                       cpu_relax();
-               isb();
-       }
-
-       return ret;
-}
-
-int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
-{
-       struct aarch64_insn_patch patch = {
-               .text_addrs = addrs,
-               .new_insns = insns,
-               .insn_cnt = cnt,
-               .cpu_count = ATOMIC_INIT(0),
-       };
-
-       if (cnt <= 0)
-               return -EINVAL;
-
-       return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
-                                      cpu_online_mask);
-}
-
-static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
-                                               u32 *maskp, int *shiftp)
-{
-       u32 mask;
-       int shift;
-
-       switch (type) {
-       case AARCH64_INSN_IMM_26:
-               mask = BIT(26) - 1;
-               shift = 0;
-               break;
-       case AARCH64_INSN_IMM_19:
-               mask = BIT(19) - 1;
-               shift = 5;
-               break;
-       case AARCH64_INSN_IMM_16:
-               mask = BIT(16) - 1;
-               shift = 5;
-               break;
-       case AARCH64_INSN_IMM_14:
-               mask = BIT(14) - 1;
-               shift = 5;
-               break;
-       case AARCH64_INSN_IMM_12:
-               mask = BIT(12) - 1;
-               shift = 10;
-               break;
-       case AARCH64_INSN_IMM_9:
-               mask = BIT(9) - 1;
-               shift = 12;
-               break;
-       case AARCH64_INSN_IMM_7:
-               mask = BIT(7) - 1;
-               shift = 15;
-               break;
-       case AARCH64_INSN_IMM_6:
-       case AARCH64_INSN_IMM_S:
-               mask = BIT(6) - 1;
-               shift = 10;
-               break;
-       case AARCH64_INSN_IMM_R:
-               mask = BIT(6) - 1;
-               shift = 16;
-               break;
-       case AARCH64_INSN_IMM_N:
-               mask = 1;
-               shift = 22;
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       *maskp = mask;
-       *shiftp = shift;
-
-       return 0;
-}
-
-#define ADR_IMM_HILOSPLIT      2
-#define ADR_IMM_SIZE           SZ_2M
-#define ADR_IMM_LOMASK         ((1 << ADR_IMM_HILOSPLIT) - 1)
-#define ADR_IMM_HIMASK         ((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
-#define ADR_IMM_LOSHIFT                29
-#define ADR_IMM_HISHIFT                5
-
-u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
-{
-       u32 immlo, immhi, mask;
-       int shift;
-
-       switch (type) {
-       case AARCH64_INSN_IMM_ADR:
-               shift = 0;
-               immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
-               immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
-               insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
-               mask = ADR_IMM_SIZE - 1;
-               break;
-       default:
-               if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
-                       pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
-                              type);
-                       return 0;
-               }
-       }
-
-       return (insn >> shift) & mask;
-}
-
-u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
-                                 u32 insn, u64 imm)
-{
-       u32 immlo, immhi, mask;
-       int shift;
-
-       if (insn == AARCH64_BREAK_FAULT)
-               return AARCH64_BREAK_FAULT;
-
-       switch (type) {
-       case AARCH64_INSN_IMM_ADR:
-               shift = 0;
-               immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
-               imm >>= ADR_IMM_HILOSPLIT;
-               immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
-               imm = immlo | immhi;
-               mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
-                       (ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
-               break;
-       default:
-               if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
-                       pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
-                              type);
-                       return AARCH64_BREAK_FAULT;
-               }
-       }
-
-       /* Update the immediate field. */
-       insn &= ~(mask << shift);
-       insn |= (imm & mask) << shift;
-
-       return insn;
-}
-
-u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
-                                       u32 insn)
-{
-       int shift;
-
-       switch (type) {
-       case AARCH64_INSN_REGTYPE_RT:
-       case AARCH64_INSN_REGTYPE_RD:
-               shift = 0;
-               break;
-       case AARCH64_INSN_REGTYPE_RN:
-               shift = 5;
-               break;
-       case AARCH64_INSN_REGTYPE_RT2:
-       case AARCH64_INSN_REGTYPE_RA:
-               shift = 10;
-               break;
-       case AARCH64_INSN_REGTYPE_RM:
-               shift = 16;
-               break;
-       default:
-               pr_err("%s: unknown register type encoding %d\n", __func__,
-                      type);
-               return 0;
-       }
-
-       return (insn >> shift) & GENMASK(4, 0);
-}
-
-static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
-                                       u32 insn,
-                                       enum aarch64_insn_register reg)
-{
-       int shift;
-
-       if (insn == AARCH64_BREAK_FAULT)
-               return AARCH64_BREAK_FAULT;
-
-       if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) {
-               pr_err("%s: unknown register encoding %d\n", __func__, reg);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (type) {
-       case AARCH64_INSN_REGTYPE_RT:
-       case AARCH64_INSN_REGTYPE_RD:
-               shift = 0;
-               break;
-       case AARCH64_INSN_REGTYPE_RN:
-               shift = 5;
-               break;
-       case AARCH64_INSN_REGTYPE_RT2:
-       case AARCH64_INSN_REGTYPE_RA:
-               shift = 10;
-               break;
-       case AARCH64_INSN_REGTYPE_RM:
-       case AARCH64_INSN_REGTYPE_RS:
-               shift = 16;
-               break;
-       default:
-               pr_err("%s: unknown register type encoding %d\n", __func__,
-                      type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn &= ~(GENMASK(4, 0) << shift);
-       insn |= reg << shift;
-
-       return insn;
-}
-
-static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
-                                        u32 insn)
-{
-       u32 size;
-
-       switch (type) {
-       case AARCH64_INSN_SIZE_8:
-               size = 0;
-               break;
-       case AARCH64_INSN_SIZE_16:
-               size = 1;
-               break;
-       case AARCH64_INSN_SIZE_32:
-               size = 2;
-               break;
-       case AARCH64_INSN_SIZE_64:
-               size = 3;
-               break;
-       default:
-               pr_err("%s: unknown size encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn &= ~GENMASK(31, 30);
-       insn |= size << 30;
-
-       return insn;
-}
-
-static inline long branch_imm_common(unsigned long pc, unsigned long addr,
-                                    long range)
-{
-       long offset;
-
-       if ((pc & 0x3) || (addr & 0x3)) {
-               pr_err("%s: A64 instructions must be word aligned\n", __func__);
-               return range;
-       }
-
-       offset = ((long)addr - (long)pc);
-
-       if (offset < -range || offset >= range) {
-               pr_err("%s: offset out of range\n", __func__);
-               return range;
-       }
-
-       return offset;
-}
-
-u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
-                                         enum aarch64_insn_branch_type type)
-{
-       u32 insn;
-       long offset;
-
-       /*
-        * B/BL support [-128M, 128M) offset
-        * ARM64 virtual address arrangement guarantees all kernel and module
-        * texts are within +/-128M.
-        */
-       offset = branch_imm_common(pc, addr, SZ_128M);
-       if (offset >= SZ_128M)
-               return AARCH64_BREAK_FAULT;
-
-       switch (type) {
-       case AARCH64_INSN_BRANCH_LINK:
-               insn = aarch64_insn_get_bl_value();
-               break;
-       case AARCH64_INSN_BRANCH_NOLINK:
-               insn = aarch64_insn_get_b_value();
-               break;
-       default:
-               pr_err("%s: unknown branch encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
-                                            offset >> 2);
-}
-
-u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
-                                    enum aarch64_insn_register reg,
-                                    enum aarch64_insn_variant variant,
-                                    enum aarch64_insn_branch_type type)
-{
-       u32 insn;
-       long offset;
-
-       offset = branch_imm_common(pc, addr, SZ_1M);
-       if (offset >= SZ_1M)
-               return AARCH64_BREAK_FAULT;
-
-       switch (type) {
-       case AARCH64_INSN_BRANCH_COMP_ZERO:
-               insn = aarch64_insn_get_cbz_value();
-               break;
-       case AARCH64_INSN_BRANCH_COMP_NONZERO:
-               insn = aarch64_insn_get_cbnz_value();
-               break;
-       default:
-               pr_err("%s: unknown branch encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
-                                            offset >> 2);
-}
-
-u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
-                                    enum aarch64_insn_condition cond)
-{
-       u32 insn;
-       long offset;
-
-       offset = branch_imm_common(pc, addr, SZ_1M);
-
-       insn = aarch64_insn_get_bcond_value();
-
-       if (cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL) {
-               pr_err("%s: unknown condition encoding %d\n", __func__, cond);
-               return AARCH64_BREAK_FAULT;
-       }
-       insn |= cond;
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
-                                            offset >> 2);
-}
-
-u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op)
-{
-       return aarch64_insn_get_hint_value() | op;
-}
-
-u32 __kprobes aarch64_insn_gen_nop(void)
-{
-       return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
-}
-
-u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
-                               enum aarch64_insn_branch_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_BRANCH_NOLINK:
-               insn = aarch64_insn_get_br_value();
-               break;
-       case AARCH64_INSN_BRANCH_LINK:
-               insn = aarch64_insn_get_blr_value();
-               break;
-       case AARCH64_INSN_BRANCH_RETURN:
-               insn = aarch64_insn_get_ret_value();
-               break;
-       default:
-               pr_err("%s: unknown branch encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, reg);
-}
-
-u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
-                                   enum aarch64_insn_register base,
-                                   enum aarch64_insn_register offset,
-                                   enum aarch64_insn_size_type size,
-                                   enum aarch64_insn_ldst_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_LDST_LOAD_REG_OFFSET:
-               insn = aarch64_insn_get_ldr_reg_value();
-               break;
-       case AARCH64_INSN_LDST_STORE_REG_OFFSET:
-               insn = aarch64_insn_get_str_reg_value();
-               break;
-       default:
-               pr_err("%s: unknown load/store encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_ldst_size(size, insn);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
-                                           base);
-
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
-                                           offset);
-}
-
-u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
-                                    enum aarch64_insn_register reg2,
-                                    enum aarch64_insn_register base,
-                                    int offset,
-                                    enum aarch64_insn_variant variant,
-                                    enum aarch64_insn_ldst_type type)
-{
-       u32 insn;
-       int shift;
-
-       switch (type) {
-       case AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX:
-               insn = aarch64_insn_get_ldp_pre_value();
-               break;
-       case AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX:
-               insn = aarch64_insn_get_stp_pre_value();
-               break;
-       case AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX:
-               insn = aarch64_insn_get_ldp_post_value();
-               break;
-       case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX:
-               insn = aarch64_insn_get_stp_post_value();
-               break;
-       default:
-               pr_err("%s: unknown load/store encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               if ((offset & 0x3) || (offset < -256) || (offset > 252)) {
-                       pr_err("%s: offset must be multiples of 4 in the range of [-256, 252] %d\n",
-                              __func__, offset);
-                       return AARCH64_BREAK_FAULT;
-               }
-               shift = 2;
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               if ((offset & 0x7) || (offset < -512) || (offset > 504)) {
-                       pr_err("%s: offset must be multiples of 8 in the range of [-512, 504] %d\n",
-                              __func__, offset);
-                       return AARCH64_BREAK_FAULT;
-               }
-               shift = 3;
-               insn |= AARCH64_INSN_SF_BIT;
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
-                                           reg1);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
-                                           reg2);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
-                                           base);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_7, insn,
-                                            offset >> shift);
-}
-
-u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
-                                  enum aarch64_insn_register base,
-                                  enum aarch64_insn_register state,
-                                  enum aarch64_insn_size_type size,
-                                  enum aarch64_insn_ldst_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_LDST_LOAD_EX:
-               insn = aarch64_insn_get_load_ex_value();
-               break;
-       case AARCH64_INSN_LDST_STORE_EX:
-               insn = aarch64_insn_get_store_ex_value();
-               break;
-       default:
-               pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_ldst_size(size, insn);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
-                                           reg);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
-                                           base);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
-                                           AARCH64_INSN_REG_ZR);
-
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
-                                           state);
-}
-
-u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
-                          enum aarch64_insn_register address,
-                          enum aarch64_insn_register value,
-                          enum aarch64_insn_size_type size)
-{
-       u32 insn = aarch64_insn_get_ldadd_value();
-
-       switch (size) {
-       case AARCH64_INSN_SIZE_32:
-       case AARCH64_INSN_SIZE_64:
-               break;
-       default:
-               pr_err("%s: unimplemented size encoding %d\n", __func__, size);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_ldst_size(size, insn);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
-                                           result);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
-                                           address);
-
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
-                                           value);
-}
-
-u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
-                          enum aarch64_insn_register value,
-                          enum aarch64_insn_size_type size)
-{
-       /*
-        * STADD is simply encoded as an alias for LDADD with XZR as
-        * the destination register.
-        */
-       return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
-                                     value, size);
-}
-
-static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
-                                       enum aarch64_insn_prfm_target target,
-                                       enum aarch64_insn_prfm_policy policy,
-                                       u32 insn)
-{
-       u32 imm_type = 0, imm_target = 0, imm_policy = 0;
-
-       switch (type) {
-       case AARCH64_INSN_PRFM_TYPE_PLD:
-               break;
-       case AARCH64_INSN_PRFM_TYPE_PLI:
-               imm_type = BIT(0);
-               break;
-       case AARCH64_INSN_PRFM_TYPE_PST:
-               imm_type = BIT(1);
-               break;
-       default:
-               pr_err("%s: unknown prfm type encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (target) {
-       case AARCH64_INSN_PRFM_TARGET_L1:
-               break;
-       case AARCH64_INSN_PRFM_TARGET_L2:
-               imm_target = BIT(0);
-               break;
-       case AARCH64_INSN_PRFM_TARGET_L3:
-               imm_target = BIT(1);
-               break;
-       default:
-               pr_err("%s: unknown prfm target encoding %d\n", __func__, target);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (policy) {
-       case AARCH64_INSN_PRFM_POLICY_KEEP:
-               break;
-       case AARCH64_INSN_PRFM_POLICY_STRM:
-               imm_policy = BIT(0);
-               break;
-       default:
-               pr_err("%s: unknown prfm policy encoding %d\n", __func__, policy);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       /* In this case, imm5 is encoded into Rt field. */
-       insn &= ~GENMASK(4, 0);
-       insn |= imm_policy | (imm_target << 1) | (imm_type << 3);
-
-       return insn;
-}
-
-u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
-                             enum aarch64_insn_prfm_type type,
-                             enum aarch64_insn_prfm_target target,
-                             enum aarch64_insn_prfm_policy policy)
-{
-       u32 insn = aarch64_insn_get_prfm_value();
-
-       insn = aarch64_insn_encode_ldst_size(AARCH64_INSN_SIZE_64, insn);
-
-       insn = aarch64_insn_encode_prfm_imm(type, target, policy, insn);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
-                                           base);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, 0);
-}
-
-u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
-                                enum aarch64_insn_register src,
-                                int imm, enum aarch64_insn_variant variant,
-                                enum aarch64_insn_adsb_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_ADSB_ADD:
-               insn = aarch64_insn_get_add_imm_value();
-               break;
-       case AARCH64_INSN_ADSB_SUB:
-               insn = aarch64_insn_get_sub_imm_value();
-               break;
-       case AARCH64_INSN_ADSB_ADD_SETFLAGS:
-               insn = aarch64_insn_get_adds_imm_value();
-               break;
-       case AARCH64_INSN_ADSB_SUB_SETFLAGS:
-               insn = aarch64_insn_get_subs_imm_value();
-               break;
-       default:
-               pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       /* We can't encode more than a 24bit value (12bit + 12bit shift) */
-       if (imm & ~(BIT(24) - 1))
-               goto out;
-
-       /* If we have something in the top 12 bits... */
-       if (imm & ~(SZ_4K - 1)) {
-               /* ... and in the low 12 bits -> error */
-               if (imm & (SZ_4K - 1))
-                       goto out;
-
-               imm >>= 12;
-               insn |= AARCH64_INSN_LSL_12;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
-
-out:
-       pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
-       return AARCH64_BREAK_FAULT;
-}
-
-u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
-                             enum aarch64_insn_register src,
-                             int immr, int imms,
-                             enum aarch64_insn_variant variant,
-                             enum aarch64_insn_bitfield_type type)
-{
-       u32 insn;
-       u32 mask;
-
-       switch (type) {
-       case AARCH64_INSN_BITFIELD_MOVE:
-               insn = aarch64_insn_get_bfm_value();
-               break;
-       case AARCH64_INSN_BITFIELD_MOVE_UNSIGNED:
-               insn = aarch64_insn_get_ubfm_value();
-               break;
-       case AARCH64_INSN_BITFIELD_MOVE_SIGNED:
-               insn = aarch64_insn_get_sbfm_value();
-               break;
-       default:
-               pr_err("%s: unknown bitfield encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               mask = GENMASK(4, 0);
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT | AARCH64_INSN_N_BIT;
-               mask = GENMASK(5, 0);
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       if (immr & ~mask) {
-               pr_err("%s: invalid immr encoding %d\n", __func__, immr);
-               return AARCH64_BREAK_FAULT;
-       }
-       if (imms & ~mask) {
-               pr_err("%s: invalid imms encoding %d\n", __func__, imms);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
-       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
-}
-
-u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
-                             int imm, int shift,
-                             enum aarch64_insn_variant variant,
-                             enum aarch64_insn_movewide_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_MOVEWIDE_ZERO:
-               insn = aarch64_insn_get_movz_value();
-               break;
-       case AARCH64_INSN_MOVEWIDE_KEEP:
-               insn = aarch64_insn_get_movk_value();
-               break;
-       case AARCH64_INSN_MOVEWIDE_INVERSE:
-               insn = aarch64_insn_get_movn_value();
-               break;
-       default:
-               pr_err("%s: unknown movewide encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       if (imm & ~(SZ_64K - 1)) {
-               pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               if (shift != 0 && shift != 16) {
-                       pr_err("%s: invalid shift encoding %d\n", __func__,
-                              shift);
-                       return AARCH64_BREAK_FAULT;
-               }
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               if (shift != 0 && shift != 16 && shift != 32 && shift != 48) {
-                       pr_err("%s: invalid shift encoding %d\n", __func__,
-                              shift);
-                       return AARCH64_BREAK_FAULT;
-               }
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn |= (shift >> 4) << 21;
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
-}
-
-u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
-                                        enum aarch64_insn_register src,
-                                        enum aarch64_insn_register reg,
-                                        int shift,
-                                        enum aarch64_insn_variant variant,
-                                        enum aarch64_insn_adsb_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_ADSB_ADD:
-               insn = aarch64_insn_get_add_value();
-               break;
-       case AARCH64_INSN_ADSB_SUB:
-               insn = aarch64_insn_get_sub_value();
-               break;
-       case AARCH64_INSN_ADSB_ADD_SETFLAGS:
-               insn = aarch64_insn_get_adds_value();
-               break;
-       case AARCH64_INSN_ADSB_SUB_SETFLAGS:
-               insn = aarch64_insn_get_subs_value();
-               break;
-       default:
-               pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               if (shift & ~(SZ_32 - 1)) {
-                       pr_err("%s: invalid shift encoding %d\n", __func__,
-                              shift);
-                       return AARCH64_BREAK_FAULT;
-               }
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               if (shift & ~(SZ_64 - 1)) {
-                       pr_err("%s: invalid shift encoding %d\n", __func__,
-                              shift);
-                       return AARCH64_BREAK_FAULT;
-               }
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
-}
-
-u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
-                          enum aarch64_insn_register src,
-                          enum aarch64_insn_variant variant,
-                          enum aarch64_insn_data1_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_DATA1_REVERSE_16:
-               insn = aarch64_insn_get_rev16_value();
-               break;
-       case AARCH64_INSN_DATA1_REVERSE_32:
-               insn = aarch64_insn_get_rev32_value();
-               break;
-       case AARCH64_INSN_DATA1_REVERSE_64:
-               if (variant != AARCH64_INSN_VARIANT_64BIT) {
-                       pr_err("%s: invalid variant for reverse64 %d\n",
-                              __func__, variant);
-                       return AARCH64_BREAK_FAULT;
-               }
-               insn = aarch64_insn_get_rev64_value();
-               break;
-       default:
-               pr_err("%s: unknown data1 encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-}
-
-u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
-                          enum aarch64_insn_register src,
-                          enum aarch64_insn_register reg,
-                          enum aarch64_insn_variant variant,
-                          enum aarch64_insn_data2_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_DATA2_UDIV:
-               insn = aarch64_insn_get_udiv_value();
-               break;
-       case AARCH64_INSN_DATA2_SDIV:
-               insn = aarch64_insn_get_sdiv_value();
-               break;
-       case AARCH64_INSN_DATA2_LSLV:
-               insn = aarch64_insn_get_lslv_value();
-               break;
-       case AARCH64_INSN_DATA2_LSRV:
-               insn = aarch64_insn_get_lsrv_value();
-               break;
-       case AARCH64_INSN_DATA2_ASRV:
-               insn = aarch64_insn_get_asrv_value();
-               break;
-       case AARCH64_INSN_DATA2_RORV:
-               insn = aarch64_insn_get_rorv_value();
-               break;
-       default:
-               pr_err("%s: unknown data2 encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
-}
-
-u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
-                          enum aarch64_insn_register src,
-                          enum aarch64_insn_register reg1,
-                          enum aarch64_insn_register reg2,
-                          enum aarch64_insn_variant variant,
-                          enum aarch64_insn_data3_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_DATA3_MADD:
-               insn = aarch64_insn_get_madd_value();
-               break;
-       case AARCH64_INSN_DATA3_MSUB:
-               insn = aarch64_insn_get_msub_value();
-               break;
-       default:
-               pr_err("%s: unknown data3 encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RA, insn, src);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
-                                           reg1);
-
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
-                                           reg2);
-}
-
-u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
-                                        enum aarch64_insn_register src,
-                                        enum aarch64_insn_register reg,
-                                        int shift,
-                                        enum aarch64_insn_variant variant,
-                                        enum aarch64_insn_logic_type type)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_LOGIC_AND:
-               insn = aarch64_insn_get_and_value();
-               break;
-       case AARCH64_INSN_LOGIC_BIC:
-               insn = aarch64_insn_get_bic_value();
-               break;
-       case AARCH64_INSN_LOGIC_ORR:
-               insn = aarch64_insn_get_orr_value();
-               break;
-       case AARCH64_INSN_LOGIC_ORN:
-               insn = aarch64_insn_get_orn_value();
-               break;
-       case AARCH64_INSN_LOGIC_EOR:
-               insn = aarch64_insn_get_eor_value();
-               break;
-       case AARCH64_INSN_LOGIC_EON:
-               insn = aarch64_insn_get_eon_value();
-               break;
-       case AARCH64_INSN_LOGIC_AND_SETFLAGS:
-               insn = aarch64_insn_get_ands_value();
-               break;
-       case AARCH64_INSN_LOGIC_BIC_SETFLAGS:
-               insn = aarch64_insn_get_bics_value();
-               break;
-       default:
-               pr_err("%s: unknown logical encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               if (shift & ~(SZ_32 - 1)) {
-                       pr_err("%s: invalid shift encoding %d\n", __func__,
-                              shift);
-                       return AARCH64_BREAK_FAULT;
-               }
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               if (shift & ~(SZ_64 - 1)) {
-                       pr_err("%s: invalid shift encoding %d\n", __func__,
-                              shift);
-                       return AARCH64_BREAK_FAULT;
-               }
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
-}
-
-/*
- * MOV (register) is architecturally an alias of ORR (shifted register) where
- * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
- */
-u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
-                             enum aarch64_insn_register src,
-                             enum aarch64_insn_variant variant)
-{
-       return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
-                                                   src, 0, variant,
-                                                   AARCH64_INSN_LOGIC_ORR);
-}
-
-u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
-                        enum aarch64_insn_register reg,
-                        enum aarch64_insn_adr_type type)
-{
-       u32 insn;
-       s32 offset;
-
-       switch (type) {
-       case AARCH64_INSN_ADR_TYPE_ADR:
-               insn = aarch64_insn_get_adr_value();
-               offset = addr - pc;
-               break;
-       case AARCH64_INSN_ADR_TYPE_ADRP:
-               insn = aarch64_insn_get_adrp_value();
-               offset = (addr - ALIGN_DOWN(pc, SZ_4K)) >> 12;
-               break;
-       default:
-               pr_err("%s: unknown adr encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       if (offset < -SZ_1M || offset >= SZ_1M)
-               return AARCH64_BREAK_FAULT;
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, reg);
-
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, offset);
-}
-
-/*
- * Decode the imm field of a branch, and return the byte offset as a
- * signed value (so it can be used when computing a new branch
- * target).
- */
-s32 aarch64_get_branch_offset(u32 insn)
-{
-       s32 imm;
-
-       if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
-               imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
-               return (imm << 6) >> 4;
-       }
-
-       if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
-           aarch64_insn_is_bcond(insn)) {
-               imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_19, insn);
-               return (imm << 13) >> 11;
-       }
-
-       if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn)) {
-               imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_14, insn);
-               return (imm << 18) >> 16;
-       }
-
-       /* Unhandled instruction */
-       BUG();
-}
-
-/*
- * Encode the displacement of a branch in the imm field and return the
- * updated instruction.
- */
-u32 aarch64_set_branch_offset(u32 insn, s32 offset)
-{
-       if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn))
-               return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
-                                                    offset >> 2);
-
-       if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
-           aarch64_insn_is_bcond(insn))
-               return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
-                                                    offset >> 2);
-
-       if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn))
-               return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_14, insn,
-                                                    offset >> 2);
-
-       /* Unhandled instruction */
-       BUG();
-}
-
-s32 aarch64_insn_adrp_get_offset(u32 insn)
-{
-       BUG_ON(!aarch64_insn_is_adrp(insn));
-       return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12;
-}
-
-u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset)
-{
-       BUG_ON(!aarch64_insn_is_adrp(insn));
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn,
-                                               offset >> 12);
-}
-
-/*
- * Extract the Op/CR data from a msr/mrs instruction.
- */
-u32 aarch64_insn_extract_system_reg(u32 insn)
-{
-       return (insn & 0x1FFFE0) >> 5;
-}
-
-bool aarch32_insn_is_wide(u32 insn)
-{
-       return insn >= 0xe800;
-}
-
-/*
- * Macros/defines for extracting register numbers from instruction.
- */
-u32 aarch32_insn_extract_reg_num(u32 insn, int offset)
-{
-       return (insn & (0xf << offset)) >> offset;
-}
-
-#define OPC2_MASK      0x7
-#define OPC2_OFFSET    5
-u32 aarch32_insn_mcr_extract_opc2(u32 insn)
-{
-       return (insn & (OPC2_MASK << OPC2_OFFSET)) >> OPC2_OFFSET;
-}
-
-#define CRM_MASK       0xf
-u32 aarch32_insn_mcr_extract_crm(u32 insn)
-{
-       return insn & CRM_MASK;
-}
-
-static bool __kprobes __check_eq(unsigned long pstate)
-{
-       return (pstate & PSR_Z_BIT) != 0;
-}
-
-static bool __kprobes __check_ne(unsigned long pstate)
-{
-       return (pstate & PSR_Z_BIT) == 0;
-}
-
-static bool __kprobes __check_cs(unsigned long pstate)
-{
-       return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_cc(unsigned long pstate)
-{
-       return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_mi(unsigned long pstate)
-{
-       return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_pl(unsigned long pstate)
-{
-       return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_vs(unsigned long pstate)
-{
-       return (pstate & PSR_V_BIT) != 0;
-}
-
-static bool __kprobes __check_vc(unsigned long pstate)
-{
-       return (pstate & PSR_V_BIT) == 0;
-}
-
-static bool __kprobes __check_hi(unsigned long pstate)
-{
-       pstate &= ~(pstate >> 1);       /* PSR_C_BIT &= ~PSR_Z_BIT */
-       return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_ls(unsigned long pstate)
-{
-       pstate &= ~(pstate >> 1);       /* PSR_C_BIT &= ~PSR_Z_BIT */
-       return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_ge(unsigned long pstate)
-{
-       pstate ^= (pstate << 3);        /* PSR_N_BIT ^= PSR_V_BIT */
-       return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_lt(unsigned long pstate)
-{
-       pstate ^= (pstate << 3);        /* PSR_N_BIT ^= PSR_V_BIT */
-       return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_gt(unsigned long pstate)
-{
-       /*PSR_N_BIT ^= PSR_V_BIT */
-       unsigned long temp = pstate ^ (pstate << 3);
-
-       temp |= (pstate << 1);  /*PSR_N_BIT |= PSR_Z_BIT */
-       return (temp & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_le(unsigned long pstate)
-{
-       /*PSR_N_BIT ^= PSR_V_BIT */
-       unsigned long temp = pstate ^ (pstate << 3);
-
-       temp |= (pstate << 1);  /*PSR_N_BIT |= PSR_Z_BIT */
-       return (temp & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_al(unsigned long pstate)
-{
-       return true;
-}
-
-/*
- * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
- * it behaves identically to 0b1110 ("al").
- */
-pstate_check_t * const aarch32_opcode_cond_checks[16] = {
-       __check_eq, __check_ne, __check_cs, __check_cc,
-       __check_mi, __check_pl, __check_vs, __check_vc,
-       __check_hi, __check_ls, __check_ge, __check_lt,
-       __check_gt, __check_le, __check_al, __check_al
-};
-
-static bool range_of_ones(u64 val)
-{
-       /* Doesn't handle full ones or full zeroes */
-       u64 sval = val >> __ffs64(val);
-
-       /* One of Sean Eron Anderson's bithack tricks */
-       return ((sval + 1) & (sval)) == 0;
-}
-
-static u32 aarch64_encode_immediate(u64 imm,
-                                   enum aarch64_insn_variant variant,
-                                   u32 insn)
-{
-       unsigned int immr, imms, n, ones, ror, esz, tmp;
-       u64 mask;
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               esz = 32;
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               insn |= AARCH64_INSN_SF_BIT;
-               esz = 64;
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       mask = GENMASK(esz - 1, 0);
-
-       /* Can't encode full zeroes, full ones, or value wider than the mask */
-       if (!imm || imm == mask || imm & ~mask)
-               return AARCH64_BREAK_FAULT;
-
-       /*
-        * Inverse of Replicate(). Try to spot a repeating pattern
-        * with a pow2 stride.
-        */
-       for (tmp = esz / 2; tmp >= 2; tmp /= 2) {
-               u64 emask = BIT(tmp) - 1;
-
-               if ((imm & emask) != ((imm >> tmp) & emask))
-                       break;
-
-               esz = tmp;
-               mask = emask;
-       }
-
-       /* N is only set if we're encoding a 64bit value */
-       n = esz == 64;
-
-       /* Trim imm to the element size */
-       imm &= mask;
-
-       /* That's how many ones we need to encode */
-       ones = hweight64(imm);
-
-       /*
-        * imms is set to (ones - 1), prefixed with a string of ones
-        * and a zero if they fit. Cap it to 6 bits.
-        */
-       imms  = ones - 1;
-       imms |= 0xf << ffs(esz);
-       imms &= BIT(6) - 1;
-
-       /* Compute the rotation */
-       if (range_of_ones(imm)) {
-               /*
-                * Pattern: 0..01..10..0
-                *
-                * Compute how many rotate we need to align it right
-                */
-               ror = __ffs64(imm);
-       } else {
-               /*
-                * Pattern: 0..01..10..01..1
-                *
-                * Fill the unused top bits with ones, and check if
-                * the result is a valid immediate (all ones with a
-                * contiguous ranges of zeroes).
-                */
-               imm |= ~mask;
-               if (!range_of_ones(~imm))
-                       return AARCH64_BREAK_FAULT;
-
-               /*
-                * Compute the rotation to get a continuous set of
-                * ones, with the first bit set at position 0
-                */
-               ror = fls(~imm);
-       }
-
-       /*
-        * immr is the number of bits we need to rotate back to the
-        * original set of ones. Note that this is relative to the
-        * element size...
-        */
-       immr = (esz - ror) % esz;
-
-       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n);
-       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
-       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
-}
-
-u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
-                                      enum aarch64_insn_variant variant,
-                                      enum aarch64_insn_register Rn,
-                                      enum aarch64_insn_register Rd,
-                                      u64 imm)
-{
-       u32 insn;
-
-       switch (type) {
-       case AARCH64_INSN_LOGIC_AND:
-               insn = aarch64_insn_get_and_imm_value();
-               break;
-       case AARCH64_INSN_LOGIC_ORR:
-               insn = aarch64_insn_get_orr_imm_value();
-               break;
-       case AARCH64_INSN_LOGIC_EOR:
-               insn = aarch64_insn_get_eor_imm_value();
-               break;
-       case AARCH64_INSN_LOGIC_AND_SETFLAGS:
-               insn = aarch64_insn_get_ands_imm_value();
-               break;
-       default:
-               pr_err("%s: unknown logical encoding %d\n", __func__, type);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
-       return aarch64_encode_immediate(imm, variant, insn);
-}
-
-u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
-                         enum aarch64_insn_register Rm,
-                         enum aarch64_insn_register Rn,
-                         enum aarch64_insn_register Rd,
-                         u8 lsb)
-{
-       u32 insn;
-
-       insn = aarch64_insn_get_extr_value();
-
-       switch (variant) {
-       case AARCH64_INSN_VARIANT_32BIT:
-               if (lsb > 31)
-                       return AARCH64_BREAK_FAULT;
-               break;
-       case AARCH64_INSN_VARIANT_64BIT:
-               if (lsb > 63)
-                       return AARCH64_BREAK_FAULT;
-               insn |= AARCH64_INSN_SF_BIT;
-               insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1);
-               break;
-       default:
-               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
-               return AARCH64_BREAK_FAULT;
-       }
-
-       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb);
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
-       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
-       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
-}
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c

index 9a8a0ae..fc98037 100644 (file)
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -8,6 +8,7 @@
  #include <linux/kernel.h>
  #include <linux/jump_label.h>
  #include <asm/insn.h>
+#include <asm/patching.h>
  
  void arch_jump_label_transform(struct jump_entry *entry,
                                enum jump_label_type type)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c

index 341342b..cfa2cfd 100644 (file)
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void)
          * we end up running with module randomization disabled.
          */
         module_alloc_base = (u64)_etext - MODULES_VSIZE;
-       __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
+       dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+                           (unsigned long)&module_alloc_base +
+                                   sizeof(module_alloc_base));
  
         /*
          * Try to map the FDT early. If this fails, we simply bail,
@@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void)
         module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
         module_alloc_base &= PAGE_MASK;
  
-       __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
-       __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
+       dcache_clean_inval_poc((unsigned long)&module_alloc_base,
+                           (unsigned long)&module_alloc_base +
+                                   sizeof(module_alloc_base));
+       dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
+                           (unsigned long)&memstart_offset_seed +
+                                   sizeof(memstart_offset_seed));
  
         return offset;
  }
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c

index 1a157ca..2aede78 100644 (file)
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -17,6 +17,7 @@
  
  #include <asm/debug-monitors.h>
  #include <asm/insn.h>
+#include <asm/patching.h>
  #include <asm/traps.h>
  
  struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c

index 90a335c..03ceabe 100644 (file)
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -68,10 +68,16 @@ int machine_kexec_post_load(struct kimage *kimage)
         kimage->arch.kern_reloc = __pa(reloc_code);
         kexec_image_info(kimage);
  
-       /* Flush the reloc_code in preparation for its execution. */
-       __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
-       flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
-                          arm64_relocate_new_kernel_size);
+       /*
+        * For execution with the MMU off, reloc_code needs to be cleaned to the
+        * PoC and invalidated from the I-cache.
+        */
+       dcache_clean_inval_poc((unsigned long)reloc_code,
+                           (unsigned long)reloc_code +
+                                   arm64_relocate_new_kernel_size);
+       icache_inval_pou((uintptr_t)reloc_code,
+                               (uintptr_t)reloc_code +
+                                       arm64_relocate_new_kernel_size);
  
         return 0;
  }
@@ -102,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage)
  
         for (entry = &kimage->head; ; entry++) {
                 unsigned int flag;
-               void *addr;
+               unsigned long addr;
  
                 /* flush the list entries. */
-               __flush_dcache_area(entry, sizeof(kimage_entry_t));
+               dcache_clean_inval_poc((unsigned long)entry,
+                                   (unsigned long)entry +
+                                           sizeof(kimage_entry_t));
  
                 flag = *entry & IND_FLAGS;
                 if (flag == IND_DONE)
                         break;
  
-               addr = phys_to_virt(*entry & PAGE_MASK);
+               addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
  
                 switch (flag) {
                 case IND_INDIRECTION:
@@ -120,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage)
                         break;
                 case IND_SOURCE:
                         /* flush the source pages. */
-                       __flush_dcache_area(addr, PAGE_SIZE);
+                       dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
                         break;
                 case IND_DESTINATION:
                         break;
@@ -147,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage)
                         kimage->segment[i].memsz,
                         kimage->segment[i].memsz /  PAGE_SIZE);
  
-               __flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
-                       kimage->segment[i].memsz);
+               dcache_clean_inval_poc(
+                       (unsigned long)phys_to_virt(kimage->segment[i].mem),
+                       (unsigned long)phys_to_virt(kimage->segment[i].mem) +
+                               kimage->segment[i].memsz);
         }
  }
  
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c

new file mode 100644 (file)

index 0000000..771f543
--- /dev/null
+++ b/arch/arm64/kernel/patching.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+#include <asm/insn.h>
+#include <asm/kprobes.h>
+#include <asm/patching.h>
+#include <asm/sections.h>
+
+static DEFINE_RAW_SPINLOCK(patch_lock);
+
+static bool is_exit_text(unsigned long addr)
+{
+       /* discarded with init text/data */
+       return system_state < SYSTEM_RUNNING &&
+               addr >= (unsigned long)__exittext_begin &&
+               addr < (unsigned long)__exittext_end;
+}
+
+static bool is_image_text(unsigned long addr)
+{
+       return core_kernel_text(addr) || is_exit_text(addr);
+}
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+       unsigned long uintaddr = (uintptr_t) addr;
+       bool image = is_image_text(uintaddr);
+       struct page *page;
+
+       if (image)
+               page = phys_to_page(__pa_symbol(addr));
+       else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+               page = vmalloc_to_page(addr);
+       else
+               return addr;
+
+       BUG_ON(!page);
+       return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
+                       (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+       clear_fixmap(fixmap);
+}
+/*
+ * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
+ * little-endian.
+ */
+int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
+{
+       int ret;
+       __le32 val;
+
+       ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
+       if (!ret)
+               *insnp = le32_to_cpu(val);
+
+       return ret;
+}
+
+static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
+{
+       void *waddr = addr;
+       unsigned long flags = 0;
+       int ret;
+
+       raw_spin_lock_irqsave(&patch_lock, flags);
+       waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+       ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
+
+       patch_unmap(FIX_TEXT_POKE0);
+       raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+       return ret;
+}
+
+int __kprobes aarch64_insn_write(void *addr, u32 insn)
+{
+       return __aarch64_insn_write(addr, cpu_to_le32(insn));
+}
+
+int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+{
+       u32 *tp = addr;
+       int ret;
+
+       /* A64 instructions must be word aligned */
+       if ((uintptr_t)tp & 0x3)
+               return -EINVAL;
+
+       ret = aarch64_insn_write(tp, insn);
+       if (ret == 0)
+               caches_clean_inval_pou((uintptr_t)tp,
+                                    (uintptr_t)tp + AARCH64_INSN_SIZE);
+
+       return ret;
+}
+
+struct aarch64_insn_patch {
+       void            **text_addrs;
+       u32             *new_insns;
+       int             insn_cnt;
+       atomic_t        cpu_count;
+};
+
+static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+{
+       int i, ret = 0;
+       struct aarch64_insn_patch *pp = arg;
+
+       /* The first CPU becomes master */
+       if (atomic_inc_return(&pp->cpu_count) == 1) {
+               for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+                       ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+                                                            pp->new_insns[i]);
+               /* Notify other processors with an additional increment. */
+               atomic_inc(&pp->cpu_count);
+       } else {
+               while (atomic_read(&pp->cpu_count) <= num_online_cpus())
+                       cpu_relax();
+               isb();
+       }
+
+       return ret;
+}
+
+int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
+{
+       struct aarch64_insn_patch patch = {
+               .text_addrs = addrs,
+               .new_insns = insns,
+               .insn_cnt = cnt,
+               .cpu_count = ATOMIC_INIT(0),
+       };
+
+       if (cnt <= 0)
+               return -EINVAL;
+
+       return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+                                      cpu_online_mask);
+}
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c

index 88ff471..4a72c27 100644 (file)
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -116,7 +116,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
                 tail = (struct frame_tail __user *)regs->regs[29];
  
                 while (entry->nr < entry->max_stack &&
-                      tail && !((unsigned long)tail & 0xf))
+                      tail && !((unsigned long)tail & 0x7))
                         tail = user_backtrace(tail, entry);
         } else {
  #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c

index d607c99..609edde 100644 (file)
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -7,26 +7,28 @@
   * Copyright (C) 2013 Linaro Limited.
   * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
   */
+#include <linux/extable.h>
  #include <linux/kasan.h>
  #include <linux/kernel.h>
  #include <linux/kprobes.h>
-#include <linux/extable.h>
-#include <linux/slab.h>
-#include <linux/stop_machine.h>
  #include <linux/sched/debug.h>
  #include <linux/set_memory.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
  #include <linux/stringify.h>
+#include <linux/uaccess.h>
  #include <linux/vmalloc.h>
-#include <asm/traps.h>
-#include <asm/ptrace.h>
+
  #include <asm/cacheflush.h>
-#include <asm/debug-monitors.h>
  #include <asm/daifflags.h>
-#include <asm/system_misc.h>
+#include <asm/debug-monitors.h>
  #include <asm/insn.h>
-#include <linux/uaccess.h>
  #include <asm/irq.h>
+#include <asm/patching.h>
+#include <asm/ptrace.h>
  #include <asm/sections.h>
+#include <asm/system_misc.h>
+#include <asm/traps.h>
  
  #include "decode-insn.h"
  
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c

index 25f67ec..22d0b32 100644 (file)
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -10,6 +10,7 @@
  #include <linux/kprobes.h>
  
  #include <asm/ptrace.h>
+#include <asm/traps.h>
  
  #include "simulate-insn.h"
  
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c

index 2c24763..9be668f 100644 (file)
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
         memcpy(dst, src, len);
  
         /* flush caches (dcache/icache) */
-       sync_icache_aliases(dst, len);
+       sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
  
         kunmap_atomic(xol_page_kaddr);
  }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c

index b4bb67f..6b8b4b3 100644 (file)
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -18,7 +18,6 @@
  #include <linux/sched/task.h>
  #include <linux/sched/task_stack.h>
  #include <linux/kernel.h>
-#include <linux/lockdep.h>
  #include <linux/mman.h>
  #include <linux/mm.h>
  #include <linux/nospec.h>
@@ -46,7 +45,6 @@
  #include <linux/prctl.h>
  
  #include <asm/alternative.h>
-#include <asm/arch_gicv3.h>
  #include <asm/compat.h>
  #include <asm/cpufeature.h>
  #include <asm/cacheflush.h>
@@ -74,63 +72,6 @@ EXPORT_SYMBOL_GPL(pm_power_off);
  
  void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
  
-static void noinstr __cpu_do_idle(void)
-{
-       dsb(sy);
-       wfi();
-}
-
-static void noinstr __cpu_do_idle_irqprio(void)
-{
-       unsigned long pmr;
-       unsigned long daif_bits;
-
-       daif_bits = read_sysreg(daif);
-       write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif);
-
-       /*
-        * Unmask PMR before going idle to make sure interrupts can
-        * be raised.
-        */
-       pmr = gic_read_pmr();
-       gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
-       __cpu_do_idle();
-
-       gic_write_pmr(pmr);
-       write_sysreg(daif_bits, daif);
-}
-
-/*
- *     cpu_do_idle()
- *
- *     Idle the processor (wait for interrupt).
- *
- *     If the CPU supports priority masking we must do additional work to
- *     ensure that interrupts are not masked at the PMR (because the core will
- *     not wake up if we block the wake up signal in the interrupt controller).
- */
-void noinstr cpu_do_idle(void)
-{
-       if (system_uses_irq_prio_masking())
-               __cpu_do_idle_irqprio();
-       else
-               __cpu_do_idle();
-}
-
-/*
- * This is our default idle handler.
- */
-void noinstr arch_cpu_idle(void)
-{
-       /*
-        * This should do all the clock switching and wait for interrupt
-        * tricks
-        */
-       cpu_do_idle();
-       raw_local_irq_enable();
-}
-
  #ifdef CONFIG_HOTPLUG_CPU
  void arch_cpu_idle_dead(void)
  {
@@ -435,6 +376,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
         }
         p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
         p->thread.cpu_context.sp = (unsigned long)childregs;
+       /*
+        * For the benefit of the unwinder, set up childregs->stackframe
+        * as the final frame for the new task.
+        */
+       p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
  
         ptrace_hw_copy_thread(p);
  
@@ -527,6 +473,15 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
         write_sysreg(val, cntkctl_el1);
  }
  
+static void compat_thread_switch(struct task_struct *next)
+{
+       if (!is_compat_thread(task_thread_info(next)))
+               return;
+
+       if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+               set_tsk_thread_flag(next, TIF_NOTIFY_RESUME);
+}
+
  static void update_sctlr_el1(u64 sctlr)
  {
         /*
@@ -568,6 +523,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
         ssbs_thread_switch(next);
         erratum_1418040_thread_switch(prev, next);
         ptrauth_thread_switch_user(next);
+       compat_thread_switch(next);
  
         /*
          * Complete any pending TLB or cache maintenance on this CPU in case
@@ -633,8 +589,15 @@ unsigned long arch_align_stack(unsigned long sp)
   */
  void arch_setup_new_exec(void)
  {
-       current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+       unsigned long mmflags = 0;
+
+       if (is_compat_task()) {
+               mmflags = MMCF_AARCH32;
+               if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+                       set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+       }
  
+       current->mm->context.flags = mmflags;
         ptrauth_thread_init_user();
         mte_thread_init_user();
  
@@ -724,22 +687,6 @@ static int __init tagged_addr_init(void)
  core_initcall(tagged_addr_init);
  #endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
  
-asmlinkage void __sched arm64_preempt_schedule_irq(void)
-{
-       lockdep_assert_irqs_disabled();
-
-       /*
-        * Preempting a task from an IRQ means we leave copies of PSTATE
-        * on the stack. cpufeature's enable calls may modify PSTATE, but
-        * resuming one of these preempted tasks would undo those changes.
-        *
-        * Only allow a task to be preempted once cpufeatures have been
-        * enabled.
-        */
-       if (system_capabilities_finalized())
-               preempt_schedule_irq();
-}
-
  #ifdef CONFIG_BINFMT_ELF
  int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
                          bool has_interp, bool is_interp)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c

index eb2f739..499b6b2 100644 (file)
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -122,7 +122,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
  {
         return ((addr & ~(THREAD_SIZE - 1))  ==
                 (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
-               on_irq_stack(addr, NULL);
+               on_irq_stack(addr, sizeof(unsigned long), NULL);
  }
  
  /**
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c

index 2c7ca44..47f77d1 100644 (file)
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -162,31 +162,33 @@ static int init_sdei_scs(void)
         return err;
  }
  
-static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
+static bool on_sdei_normal_stack(unsigned long sp, unsigned long size,
+                                struct stack_info *info)
  {
         unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
         unsigned long high = low + SDEI_STACK_SIZE;
  
-       return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info);
+       return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info);
  }
  
-static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
+static bool on_sdei_critical_stack(unsigned long sp, unsigned long size,
+                                  struct stack_info *info)
  {
         unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
         unsigned long high = low + SDEI_STACK_SIZE;
  
-       return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info);
+       return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info);
  }
  
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
+bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info)
  {
         if (!IS_ENABLED(CONFIG_VMAP_STACK))
                 return false;
  
-       if (on_sdei_critical_stack(sp, info))
+       if (on_sdei_critical_stack(sp, size, info))
                 return true;
  
-       if (on_sdei_normal_stack(sp, info))
+       if (on_sdei_normal_stack(sp, size, info))
                 return true;
  
         return false;
@@ -231,13 +233,13 @@ out_err:
  }
  
  /*
- * __sdei_handler() returns one of:
+ * do_sdei_event() returns one of:
   *  SDEI_EV_HANDLED -  success, return to the interrupted context.
   *  SDEI_EV_FAILED  -  failure, return this error code to firmare.
   *  virtual-address -  success, return to this address.
   */
-static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
-                                            struct sdei_registered_event *arg)
+unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
+                                     struct sdei_registered_event *arg)
  {
         u32 mode;
         int i, err = 0;
@@ -292,45 +294,3 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
  
         return vbar + 0x480;
  }
-
-static void __kprobes notrace __sdei_pstate_entry(void)
-{
-       /*
-        * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
-        * whether PSTATE bits are inherited unchanged or generated from
-        * scratch, and the TF-A implementation always clears PAN and always
-        * clears UAO. There are no other known implementations.
-        *
-        * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
-        * PSTATE is modified upon architectural exceptions, and so PAN is
-        * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
-        * cleared.
-        *
-        * We must explicitly reset PAN to the expected state, including
-        * clearing it when the host isn't using it, in case a VM had it set.
-        */
-       if (system_uses_hw_pan())
-               set_pstate_pan(1);
-       else if (cpu_has_pan())
-               set_pstate_pan(0);
-}
-
-asmlinkage noinstr unsigned long
-__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
-{
-       unsigned long ret;
-
-       /*
-        * We didn't take an exception to get here, so the HW hasn't
-        * set/cleared bits in PSTATE that we may rely on. Initialize PAN.
-        */
-       __sdei_pstate_entry();
-
-       arm64_enter_nmi(regs);
-
-       ret = _sdei_handler(regs, arg);
-
-       arm64_exit_nmi(regs);
-
-       return ret;
-}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c

index 68b30e8..8ed6614 100644 (file)
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -87,12 +87,6 @@ void __init smp_setup_processor_id(void)
         u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
         set_cpu_logical_map(0, mpidr);
  
-       /*
-        * clear __my_cpu_offset on boot CPU to avoid hang caused by
-        * using percpu variable early, for example, lockdep will
-        * access percpu variable inside lock_release
-        */
-       set_my_cpu_offset(0);
         pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
                 (unsigned long)mpidr, read_cpuid_id());
  }
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c

index 6237486..f8192f4 100644 (file)
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -911,6 +911,19 @@ static void do_signal(struct pt_regs *regs)
         restore_saved_sigmask();
  }
  
+static bool cpu_affinity_invalid(struct pt_regs *regs)
+{
+       if (!compat_user_mode(regs))
+               return false;
+
+       /*
+        * We're preemptible, but a reschedule will cause us to check the
+        * affinity again.
+        */
+       return !cpumask_test_cpu(raw_smp_processor_id(),
+                                system_32bit_el0_cpumask());
+}
+
  asmlinkage void do_notify_resume(struct pt_regs *regs,
                                  unsigned long thread_flags)
  {
@@ -938,6 +951,19 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
                         if (thread_flags & _TIF_NOTIFY_RESUME) {
                                 tracehook_notify_resume(regs);
                                 rseq_handle_notify_resume(NULL, regs);
+
+                               /*
+                                * If we reschedule after checking the affinity
+                                * then we must ensure that TIF_NOTIFY_RESUME
+                                * is set so that we check the affinity again.
+                                * Since tracehook_notify_resume() clears the
+                                * flag, ensure that the compiler doesn't move
+                                * it after the affinity check.
+                                */
+                               barrier();
+
+                               if (cpu_affinity_invalid(regs))
+                                       force_sig(SIGKILL);
                         }
  
                         if (thread_flags & _TIF_FOREIGN_FPSTATE)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index dcd7041..161dab4 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -120,9 +120,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
          * page tables.
          */
         secondary_data.task = idle;
-       secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
         update_cpu_boot_status(CPU_MMU_OFF);
-       __flush_dcache_area(&secondary_data, sizeof(secondary_data));
  
         /* Now bring the CPU into our world */
         ret = boot_secondary(cpu, idle);
@@ -142,8 +140,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
  
         pr_crit("CPU%u: failed to come online\n", cpu);
         secondary_data.task = NULL;
-       secondary_data.stack = NULL;
-       __flush_dcache_area(&secondary_data, sizeof(secondary_data));
         status = READ_ONCE(secondary_data.status);
         if (status == CPU_MMU_OFF)
                 status = READ_ONCE(__early_cpu_boot_status);
@@ -202,10 +198,7 @@ asmlinkage notrace void secondary_start_kernel(void)
         u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
         struct mm_struct *mm = &init_mm;
         const struct cpu_operations *ops;
-       unsigned int cpu;
-
-       cpu = task_cpu(current);
-       set_my_cpu_offset(per_cpu_offset(cpu));
+       unsigned int cpu = smp_processor_id();
  
         /*
          * All kernel threads share the same mm context; grab a
@@ -352,7 +345,7 @@ void __cpu_die(unsigned int cpu)
                 pr_crit("CPU%u: cpu didn't die\n", cpu);
                 return;
         }
-       pr_notice("CPU%u: shutdown\n", cpu);
+       pr_debug("CPU%u: shutdown\n", cpu);
  
         /*
          * Now that the dying CPU is beyond the point of no return w.r.t.
@@ -452,6 +445,11 @@ void __init smp_cpus_done(unsigned int max_cpus)
  
  void __init smp_prepare_boot_cpu(void)
  {
+       /*
+        * The runtime per-cpu areas have been allocated by
+        * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be
+        * freed shortly, so we must move over to the runtime per-cpu area.
+        */
         set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
         cpuinfo_store_boot_cpu();
  
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c

index c45a835..7e1624e 100644 (file)
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -36,7 +36,7 @@ static void write_pen_release(u64 val)
         unsigned long size = sizeof(secondary_holding_pen_release);
  
         secondary_holding_pen_release = val;
-       __flush_dcache_area(start, size);
+       dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size);
  }
  
  
@@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
          * the boot protocol.
          */
         writeq_relaxed(pa_holding_pen, release_addr);
-       __flush_dcache_area((__force void *)release_addr,
-                           sizeof(*release_addr));
+       dcache_clean_inval_poc((__force unsigned long)release_addr,
+                           (__force unsigned long)release_addr +
+                                   sizeof(*release_addr));
  
         /*
          * Send an event to wake up the secondary CPU.
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c

index de07147..b189de5 100644 (file)
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -68,13 +68,17 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
         unsigned long fp = frame->fp;
         struct stack_info info;
  
-       if (fp & 0xf)
-               return -EINVAL;
-
         if (!tsk)
                 tsk = current;
  
-       if (!on_accessible_stack(tsk, fp, &info))
+       /* Final frame; nothing to unwind */
+       if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
+               return -ENOENT;
+
+       if (fp & 0x7)
+               return -EINVAL;
+
+       if (!on_accessible_stack(tsk, fp, 16, &info))
                 return -EINVAL;
  
         if (test_bit(info.type, frame->stacks_done))
@@ -128,12 +132,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
  
         frame->pc = ptrauth_strip_insn_pac(frame->pc);
  
-       /*
-        * This is a terminal record, so we have finished unwinding.
-        */
-       if (!frame->fp && !frame->pc)
-               return -ENOENT;
-
         return 0;
  }
  NOKPROBE_SYMBOL(unwind_frame);
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c

index e3f72df..938ce6f 100644 (file)
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -7,6 +7,7 @@
  #include <asm/alternative.h>
  #include <asm/cacheflush.h>
  #include <asm/cpufeature.h>
+#include <asm/cpuidle.h>
  #include <asm/daifflags.h>
  #include <asm/debug-monitors.h>
  #include <asm/exec.h>
@@ -91,6 +92,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
         int ret = 0;
         unsigned long flags;
         struct sleep_stack_data state;
+       struct arm_cpuidle_irq_context context;
  
         /* Report any MTE async fault before going to suspend */
         mte_suspend_enter();
@@ -103,12 +105,18 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
         flags = local_daif_save();
  
         /*
-        * Function graph tracer state gets incosistent when the kernel
+        * Function graph tracer state gets inconsistent when the kernel
          * calls functions that never return (aka suspend finishers) hence
          * disable graph tracing during their execution.
          */
         pause_graph_tracing();
  
+       /*
+        * Switch to using DAIF.IF instead of PMR in order to reliably
+        * resume if we're using pseudo-NMIs.
+        */
+       arm_cpuidle_save_irq_context(&context);
+
         if (__cpu_suspend_enter(&state)) {
                 /* Call the suspend finisher */
                 ret = fn(arg);
@@ -126,6 +134,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
                 RCU_NONIDLE(__cpu_suspend_exit());
         }
  
+       arm_cpuidle_restore_irq_context(&context);
+
         unpause_graph_tracing();
  
         /*
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c

index 265fe3e..db5159a 100644 (file)
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
                         dsb(ish);
                 }
  
-               ret = __flush_cache_user_range(start, start + chunk);
+               ret = caches_clean_inval_user_pou(start, start + chunk);
                 if (ret)
                         return ret;
  
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c

index a05d34f..b03e383 100644 (file)
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
  #include <asm/extable.h>
  #include <asm/insn.h>
  #include <asm/kprobes.h>
+#include <asm/patching.h>
  #include <asm/traps.h>
  #include <asm/smp.h>
  #include <asm/stack_pointer.h>
@@ -45,11 +46,102 @@
  #include <asm/system_misc.h>
  #include <asm/sysreg.h>
  
-static const char *handler[] = {
-       "Synchronous Abort",
-       "IRQ",
-       "FIQ",
-       "Error"
+static bool __kprobes __check_eq(unsigned long pstate)
+{
+       return (pstate & PSR_Z_BIT) != 0;
+}
+
+static bool __kprobes __check_ne(unsigned long pstate)
+{
+       return (pstate & PSR_Z_BIT) == 0;
+}
+
+static bool __kprobes __check_cs(unsigned long pstate)
+{
+       return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_cc(unsigned long pstate)
+{
+       return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_mi(unsigned long pstate)
+{
+       return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_pl(unsigned long pstate)
+{
+       return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_vs(unsigned long pstate)
+{
+       return (pstate & PSR_V_BIT) != 0;
+}
+
+static bool __kprobes __check_vc(unsigned long pstate)
+{
+       return (pstate & PSR_V_BIT) == 0;
+}
+
+static bool __kprobes __check_hi(unsigned long pstate)
+{
+       pstate &= ~(pstate >> 1);       /* PSR_C_BIT &= ~PSR_Z_BIT */
+       return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_ls(unsigned long pstate)
+{
+       pstate &= ~(pstate >> 1);       /* PSR_C_BIT &= ~PSR_Z_BIT */
+       return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_ge(unsigned long pstate)
+{
+       pstate ^= (pstate << 3);        /* PSR_N_BIT ^= PSR_V_BIT */
+       return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_lt(unsigned long pstate)
+{
+       pstate ^= (pstate << 3);        /* PSR_N_BIT ^= PSR_V_BIT */
+       return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_gt(unsigned long pstate)
+{
+       /*PSR_N_BIT ^= PSR_V_BIT */
+       unsigned long temp = pstate ^ (pstate << 3);
+
+       temp |= (pstate << 1);  /*PSR_N_BIT |= PSR_Z_BIT */
+       return (temp & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_le(unsigned long pstate)
+{
+       /*PSR_N_BIT ^= PSR_V_BIT */
+       unsigned long temp = pstate ^ (pstate << 3);
+
+       temp |= (pstate << 1);  /*PSR_N_BIT |= PSR_Z_BIT */
+       return (temp & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_al(unsigned long pstate)
+{
+       return true;
+}
+
+/*
+ * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
+ * it behaves identically to 0b1110 ("al").
+ */
+pstate_check_t * const aarch32_opcode_cond_checks[16] = {
+       __check_eq, __check_ne, __check_cs, __check_cc,
+       __check_mi, __check_pl, __check_vs, __check_vc,
+       __check_hi, __check_ls, __check_ge, __check_lt,
+       __check_gt, __check_le, __check_al, __check_al
  };
  
  int show_unhandled_signals = 0;
@@ -750,28 +842,9 @@ const char *esr_get_class_string(u32 esr)
         return esr_class_str[ESR_ELx_EC(esr)];
  }
  
-/*
- * bad_mode handles the impossible case in the exception vector. This is always
- * fatal.
- */
-asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
-{
-       arm64_enter_nmi(regs);
-
-       console_verbose();
-
-       pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
-               handler[reason], smp_processor_id(), esr,
-               esr_get_class_string(esr));
-
-       __show_regs(regs);
-       local_daif_mask();
-       panic("bad mode");
-}
-
  /*
   * bad_el0_sync handles unexpected, but potentially recoverable synchronous
- * exceptions taken from EL0. Unlike bad_mode, this returns.
+ * exceptions taken from EL0.
   */
  void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
  {
@@ -789,15 +862,11 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
  DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
         __aligned(16);
  
-asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
+void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
  {
         unsigned long tsk_stk = (unsigned long)current->stack;
         unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
         unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
-       unsigned int esr = read_sysreg(esr_el1);
-       unsigned long far = read_sysreg(far_el1);
-
-       arm64_enter_nmi(regs);
  
         console_verbose();
         pr_emerg("Insufficient stack space to handle exception!");
@@ -870,15 +939,11 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
         }
  }
  
-asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr)
+void do_serror(struct pt_regs *regs, unsigned int esr)
  {
-       arm64_enter_nmi(regs);
-
         /* non-RAS errors are not containable */
         if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
                 arm64_serror_panic(regs, esr);
-
-       arm64_exit_nmi(regs);
  }
  
  /* GENERIC_BUG traps */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c

index 1cb39c0..dc2bc55 100644 (file)
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -692,6 +692,15 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
         }
  }
  
+static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
+{
+       if (likely(!vcpu_mode_is_32bit(vcpu)))
+               return false;
+
+       return !system_supports_32bit_el0() ||
+               static_branch_unlikely(&arm64_mismatched_32bit_el0);
+}
+
  /**
   * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
   * @vcpu:      The VCPU pointer
@@ -875,7 +884,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                  * with the asymmetric AArch32 case), return to userspace with
                  * a fatal error.
                  */
-               if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) {
+               if (vcpu_mode_is_bad_32bit(vcpu)) {
                         /*
                          * As we have caught the guest red-handed, decide that
                          * it isn't fit for purpose anymore by making the vcpu
@@ -1064,7 +1073,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
                 if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
                         stage2_unmap_vm(vcpu->kvm);
                 else
-                       __flush_icache_all();
+                       icache_inval_all_pou();
         }
  
         vcpu_reset_hcr(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S

index 36cef69..958734f 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,7 @@
  #include <asm/assembler.h>
  #include <asm/alternative.h>
  
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
         dcache_by_line_op civac, sy, x0, x1, x2, x3
         ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c

index 7488f53..8143ebd 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -134,7 +134,8 @@ static void update_nvhe_init_params(void)
         for (i = 0; i < hyp_nr_cpus; i++) {
                 params = per_cpu_ptr(&kvm_init_params, i);
                 params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd);
-               __flush_dcache_area(params, sizeof(*params));
+               dcache_clean_inval_poc((unsigned long)params,
+                                   (unsigned long)params + sizeof(*params));
         }
  }
  
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c

index 83dc3b2..38ed0f6 100644 (file)
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
          * you should be running with VHE enabled.
          */
         if (icache_is_vpipt())
-               __flush_icache_all();
+               icache_inval_all_pou();
  
         __tlb_switch_to_host(&cxt);
  }
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c

index c37c1dc..e9ad7fb 100644 (file)
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -839,8 +839,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
         stage2_put_pte(ptep, mmu, addr, level, mm_ops);
  
         if (need_flush) {
-               __flush_dcache_area(kvm_pte_follow(pte, mm_ops),
-                                   kvm_granule_size(level));
+               kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+
+               dcache_clean_inval_poc((unsigned long)pte_follow,
+                                   (unsigned long)pte_follow +
+                                           kvm_granule_size(level));
         }
  
         if (childp)
@@ -988,11 +991,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
         struct kvm_pgtable *pgt = arg;
         struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
         kvm_pte_t pte = *ptep;
+       kvm_pte_t *pte_follow;
  
         if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
                 return 0;
  
-       __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
+       pte_follow = kvm_pte_follow(pte, mm_ops);
+       dcache_clean_inval_poc((unsigned long)pte_follow,
+                           (unsigned long)pte_follow +
+                                   kvm_granule_size(level));
         return 0;
  }
  
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile

index d31e116..6dd56a4 100644 (file)
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,7 +1,7 @@
  # SPDX-License-Identifier: GPL-2.0
  lib-y          := clear_user.o delay.o copy_from_user.o                \
                    copy_to_user.o copy_in_user.o copy_page.o            \
-                  clear_page.o csum.o memchr.o memcpy.o memmove.o      \
+                  clear_page.o csum.o insn.o memchr.o memcpy.o         \
                    memset.o memcmp.o strcmp.o strncmp.o strlen.o        \
                    strnlen.o strchr.o strrchr.o tishift.o
  
@@ -18,3 +18,5 @@ obj-$(CONFIG_CRC32) += crc32.o
  obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
  
  obj-$(CONFIG_ARM64_MTE) += mte.o
+
+obj-$(CONFIG_KASAN_SW_TAGS) += kasan_sw_tags.o
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S

index af9afcb..a7efb2a 100644 (file)
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -1,12 +1,9 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
- * Based on arch/arm/lib/clear_user.S
- *
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2021 Arm Ltd.
   */
-#include <linux/linkage.h>
  
-#include <asm/asm-uaccess.h>
+#include <linux/linkage.h>
  #include <asm/assembler.h>
  
         .text
@@ -19,25 +16,33 @@
   *
   * Alignment fixed up by hardware.
   */
+
+       .p2align 4
+       // Alignment is for the loop, but since the prologue (including BTI)
+       // is also 16 bytes we can keep any padding outside the function
  SYM_FUNC_START(__arch_clear_user)
-       mov     x2, x1                  // save the size for fixup return
+       add     x2, x0, x1
         subs    x1, x1, #8
         b.mi    2f
  1:
-user_ldst 9f, sttr, xzr, x0, 8
+USER(9f, sttr  xzr, [x0])
+       add     x0, x0, #8
         subs    x1, x1, #8
-       b.pl    1b
-2:     adds    x1, x1, #4
-       b.mi    3f
-user_ldst 9f, sttr, wzr, x0, 4
-       sub     x1, x1, #4
-3:     adds    x1, x1, #2
-       b.mi    4f
-user_ldst 9f, sttrh, wzr, x0, 2
-       sub     x1, x1, #2
-4:     adds    x1, x1, #1
-       b.mi    5f
-user_ldst 9f, sttrb, wzr, x0, 0
+       b.hi    1b
+USER(9f, sttr  xzr, [x2, #-8])
+       mov     x0, #0
+       ret
+
+2:     tbz     x1, #2, 3f
+USER(9f, sttr  wzr, [x0])
+USER(8f, sttr  wzr, [x2, #-4])
+       mov     x0, #0
+       ret
+
+3:     tbz     x1, #1, 4f
+USER(9f, sttrh wzr, [x0])
+4:     tbz     x1, #0, 5f
+USER(7f, sttrb wzr, [x2, #-1])
  5:     mov     x0, #0
         ret
  SYM_FUNC_END(__arch_clear_user)
@@ -45,6 +50,8 @@ EXPORT_SYMBOL(__arch_clear_user)
  
         .section .fixup,"ax"
         .align  2
-9:     mov     x0, x2                  // return the original size
+7:     sub     x0, x2, #5      // Adjust for faulting on the final byte...
+8:     add     x0, x0, #4      // ...or the second word of the 4-7 byte case
+9:     sub     x0, x2, x0
         ret
         .previous
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c

new file mode 100644 (file)

index 0000000..b506a4b
--- /dev/null
+++ b/arch/arm64/lib/insn.c
@@ -0,0 +1,1458 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <liuj97@gmail.com>
+ *
+ * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
+ */
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/printk.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include <asm/debug-monitors.h>
+#include <asm/errno.h>
+#include <asm/insn.h>
+#include <asm/kprobes.h>
+
+#define AARCH64_INSN_SF_BIT    BIT(31)
+#define AARCH64_INSN_N_BIT     BIT(22)
+#define AARCH64_INSN_LSL_12    BIT(22)
+
+static const int aarch64_insn_encoding_class[] = {
+       AARCH64_INSN_CLS_UNKNOWN,
+       AARCH64_INSN_CLS_UNKNOWN,
+       AARCH64_INSN_CLS_SVE,
+       AARCH64_INSN_CLS_UNKNOWN,
+       AARCH64_INSN_CLS_LDST,
+       AARCH64_INSN_CLS_DP_REG,
+       AARCH64_INSN_CLS_LDST,
+       AARCH64_INSN_CLS_DP_FPSIMD,
+       AARCH64_INSN_CLS_DP_IMM,
+       AARCH64_INSN_CLS_DP_IMM,
+       AARCH64_INSN_CLS_BR_SYS,
+       AARCH64_INSN_CLS_BR_SYS,
+       AARCH64_INSN_CLS_LDST,
+       AARCH64_INSN_CLS_DP_REG,
+       AARCH64_INSN_CLS_LDST,
+       AARCH64_INSN_CLS_DP_FPSIMD,
+};
+
+enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
+{
+       return aarch64_insn_encoding_class[(insn >> 25) & 0xf];
+}
+
+bool __kprobes aarch64_insn_is_steppable_hint(u32 insn)
+{
+       if (!aarch64_insn_is_hint(insn))
+               return false;
+
+       switch (insn & 0xFE0) {
+       case AARCH64_INSN_HINT_XPACLRI:
+       case AARCH64_INSN_HINT_PACIA_1716:
+       case AARCH64_INSN_HINT_PACIB_1716:
+       case AARCH64_INSN_HINT_PACIAZ:
+       case AARCH64_INSN_HINT_PACIASP:
+       case AARCH64_INSN_HINT_PACIBZ:
+       case AARCH64_INSN_HINT_PACIBSP:
+       case AARCH64_INSN_HINT_BTI:
+       case AARCH64_INSN_HINT_BTIC:
+       case AARCH64_INSN_HINT_BTIJ:
+       case AARCH64_INSN_HINT_BTIJC:
+       case AARCH64_INSN_HINT_NOP:
+               return true;
+       default:
+               return false;
+       }
+}
+
+bool aarch64_insn_is_branch_imm(u32 insn)
+{
+       return (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn) ||
+               aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn) ||
+               aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
+               aarch64_insn_is_bcond(insn));
+}
+
+bool __kprobes aarch64_insn_uses_literal(u32 insn)
+{
+       /* ldr/ldrsw (literal), prfm */
+
+       return aarch64_insn_is_ldr_lit(insn) ||
+               aarch64_insn_is_ldrsw_lit(insn) ||
+               aarch64_insn_is_adr_adrp(insn) ||
+               aarch64_insn_is_prfm_lit(insn);
+}
+
+bool __kprobes aarch64_insn_is_branch(u32 insn)
+{
+       /* b, bl, cb*, tb*, ret*, b.cond, br*, blr* */
+
+       return aarch64_insn_is_b(insn) ||
+               aarch64_insn_is_bl(insn) ||
+               aarch64_insn_is_cbz(insn) ||
+               aarch64_insn_is_cbnz(insn) ||
+               aarch64_insn_is_tbz(insn) ||
+               aarch64_insn_is_tbnz(insn) ||
+               aarch64_insn_is_ret(insn) ||
+               aarch64_insn_is_ret_auth(insn) ||
+               aarch64_insn_is_br(insn) ||
+               aarch64_insn_is_br_auth(insn) ||
+               aarch64_insn_is_blr(insn) ||
+               aarch64_insn_is_blr_auth(insn) ||
+               aarch64_insn_is_bcond(insn);
+}
+
+static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
+                                               u32 *maskp, int *shiftp)
+{
+       u32 mask;
+       int shift;
+
+       switch (type) {
+       case AARCH64_INSN_IMM_26:
+               mask = BIT(26) - 1;
+               shift = 0;
+               break;
+       case AARCH64_INSN_IMM_19:
+               mask = BIT(19) - 1;
+               shift = 5;
+               break;
+       case AARCH64_INSN_IMM_16:
+               mask = BIT(16) - 1;
+               shift = 5;
+               break;
+       case AARCH64_INSN_IMM_14:
+               mask = BIT(14) - 1;
+               shift = 5;
+               break;
+       case AARCH64_INSN_IMM_12:
+               mask = BIT(12) - 1;
+               shift = 10;
+               break;
+       case AARCH64_INSN_IMM_9:
+               mask = BIT(9) - 1;
+               shift = 12;
+               break;
+       case AARCH64_INSN_IMM_7:
+               mask = BIT(7) - 1;
+               shift = 15;
+               break;
+       case AARCH64_INSN_IMM_6:
+       case AARCH64_INSN_IMM_S:
+               mask = BIT(6) - 1;
+               shift = 10;
+               break;
+       case AARCH64_INSN_IMM_R:
+               mask = BIT(6) - 1;
+               shift = 16;
+               break;
+       case AARCH64_INSN_IMM_N:
+               mask = 1;
+               shift = 22;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       *maskp = mask;
+       *shiftp = shift;
+
+       return 0;
+}
+
+#define ADR_IMM_HILOSPLIT      2
+#define ADR_IMM_SIZE           SZ_2M
+#define ADR_IMM_LOMASK         ((1 << ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_HIMASK         ((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_LOSHIFT                29
+#define ADR_IMM_HISHIFT                5
+
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
+{
+       u32 immlo, immhi, mask;
+       int shift;
+
+       switch (type) {
+       case AARCH64_INSN_IMM_ADR:
+               shift = 0;
+               immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
+               immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
+               insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
+               mask = ADR_IMM_SIZE - 1;
+               break;
+       default:
+               if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+                       pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
+                              type);
+                       return 0;
+               }
+       }
+
+       return (insn >> shift) & mask;
+}
+
+u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+                                 u32 insn, u64 imm)
+{
+       u32 immlo, immhi, mask;
+       int shift;
+
+       if (insn == AARCH64_BREAK_FAULT)
+               return AARCH64_BREAK_FAULT;
+
+       switch (type) {
+       case AARCH64_INSN_IMM_ADR:
+               shift = 0;
+               immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
+               imm >>= ADR_IMM_HILOSPLIT;
+               immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
+               imm = immlo | immhi;
+               mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
+                       (ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
+               break;
+       default:
+               if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+                       pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+                              type);
+                       return AARCH64_BREAK_FAULT;
+               }
+       }
+
+       /* Update the immediate field. */
+       insn &= ~(mask << shift);
+       insn |= (imm & mask) << shift;
+
+       return insn;
+}
+
+u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type,
+                                       u32 insn)
+{
+       int shift;
+
+       switch (type) {
+       case AARCH64_INSN_REGTYPE_RT:
+       case AARCH64_INSN_REGTYPE_RD:
+               shift = 0;
+               break;
+       case AARCH64_INSN_REGTYPE_RN:
+               shift = 5;
+               break;
+       case AARCH64_INSN_REGTYPE_RT2:
+       case AARCH64_INSN_REGTYPE_RA:
+               shift = 10;
+               break;
+       case AARCH64_INSN_REGTYPE_RM:
+               shift = 16;
+               break;
+       default:
+               pr_err("%s: unknown register type encoding %d\n", __func__,
+                      type);
+               return 0;
+       }
+
+       return (insn >> shift) & GENMASK(4, 0);
+}
+
+static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type,
+                                       u32 insn,
+                                       enum aarch64_insn_register reg)
+{
+       int shift;
+
+       if (insn == AARCH64_BREAK_FAULT)
+               return AARCH64_BREAK_FAULT;
+
+       if (reg < AARCH64_INSN_REG_0 || reg > AARCH64_INSN_REG_SP) {
+               pr_err("%s: unknown register encoding %d\n", __func__, reg);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (type) {
+       case AARCH64_INSN_REGTYPE_RT:
+       case AARCH64_INSN_REGTYPE_RD:
+               shift = 0;
+               break;
+       case AARCH64_INSN_REGTYPE_RN:
+               shift = 5;
+               break;
+       case AARCH64_INSN_REGTYPE_RT2:
+       case AARCH64_INSN_REGTYPE_RA:
+               shift = 10;
+               break;
+       case AARCH64_INSN_REGTYPE_RM:
+       case AARCH64_INSN_REGTYPE_RS:
+               shift = 16;
+               break;
+       default:
+               pr_err("%s: unknown register type encoding %d\n", __func__,
+                      type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn &= ~(GENMASK(4, 0) << shift);
+       insn |= reg << shift;
+
+       return insn;
+}
+
+static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type,
+                                        u32 insn)
+{
+       u32 size;
+
+       switch (type) {
+       case AARCH64_INSN_SIZE_8:
+               size = 0;
+               break;
+       case AARCH64_INSN_SIZE_16:
+               size = 1;
+               break;
+       case AARCH64_INSN_SIZE_32:
+               size = 2;
+               break;
+       case AARCH64_INSN_SIZE_64:
+               size = 3;
+               break;
+       default:
+               pr_err("%s: unknown size encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn &= ~GENMASK(31, 30);
+       insn |= size << 30;
+
+       return insn;
+}
+
+static inline long branch_imm_common(unsigned long pc, unsigned long addr,
+                                    long range)
+{
+       long offset;
+
+       if ((pc & 0x3) || (addr & 0x3)) {
+               pr_err("%s: A64 instructions must be word aligned\n", __func__);
+               return range;
+       }
+
+       offset = ((long)addr - (long)pc);
+
+       if (offset < -range || offset >= range) {
+               pr_err("%s: offset out of range\n", __func__);
+               return range;
+       }
+
+       return offset;
+}
+
+u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+                                         enum aarch64_insn_branch_type type)
+{
+       u32 insn;
+       long offset;
+
+       /*
+        * B/BL support [-128M, 128M) offset
+        * ARM64 virtual address arrangement guarantees all kernel and module
+        * texts are within +/-128M.
+        */
+       offset = branch_imm_common(pc, addr, SZ_128M);
+       if (offset >= SZ_128M)
+               return AARCH64_BREAK_FAULT;
+
+       switch (type) {
+       case AARCH64_INSN_BRANCH_LINK:
+               insn = aarch64_insn_get_bl_value();
+               break;
+       case AARCH64_INSN_BRANCH_NOLINK:
+               insn = aarch64_insn_get_b_value();
+               break;
+       default:
+               pr_err("%s: unknown branch encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
+                                            offset >> 2);
+}
+
+u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr,
+                                    enum aarch64_insn_register reg,
+                                    enum aarch64_insn_variant variant,
+                                    enum aarch64_insn_branch_type type)
+{
+       u32 insn;
+       long offset;
+
+       offset = branch_imm_common(pc, addr, SZ_1M);
+       if (offset >= SZ_1M)
+               return AARCH64_BREAK_FAULT;
+
+       switch (type) {
+       case AARCH64_INSN_BRANCH_COMP_ZERO:
+               insn = aarch64_insn_get_cbz_value();
+               break;
+       case AARCH64_INSN_BRANCH_COMP_NONZERO:
+               insn = aarch64_insn_get_cbnz_value();
+               break;
+       default:
+               pr_err("%s: unknown branch encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+                                            offset >> 2);
+}
+
+u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr,
+                                    enum aarch64_insn_condition cond)
+{
+       u32 insn;
+       long offset;
+
+       offset = branch_imm_common(pc, addr, SZ_1M);
+
+       insn = aarch64_insn_get_bcond_value();
+
+       if (cond < AARCH64_INSN_COND_EQ || cond > AARCH64_INSN_COND_AL) {
+               pr_err("%s: unknown condition encoding %d\n", __func__, cond);
+               return AARCH64_BREAK_FAULT;
+       }
+       insn |= cond;
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+                                            offset >> 2);
+}
+
+u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op)
+{
+       return aarch64_insn_get_hint_value() | op;
+}
+
+u32 __kprobes aarch64_insn_gen_nop(void)
+{
+       return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP);
+}
+
+u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg,
+                               enum aarch64_insn_branch_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_BRANCH_NOLINK:
+               insn = aarch64_insn_get_br_value();
+               break;
+       case AARCH64_INSN_BRANCH_LINK:
+               insn = aarch64_insn_get_blr_value();
+               break;
+       case AARCH64_INSN_BRANCH_RETURN:
+               insn = aarch64_insn_get_ret_value();
+               break;
+       default:
+               pr_err("%s: unknown branch encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, reg);
+}
+
+u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg,
+                                   enum aarch64_insn_register base,
+                                   enum aarch64_insn_register offset,
+                                   enum aarch64_insn_size_type size,
+                                   enum aarch64_insn_ldst_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_LDST_LOAD_REG_OFFSET:
+               insn = aarch64_insn_get_ldr_reg_value();
+               break;
+       case AARCH64_INSN_LDST_STORE_REG_OFFSET:
+               insn = aarch64_insn_get_str_reg_value();
+               break;
+       default:
+               pr_err("%s: unknown load/store encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_ldst_size(size, insn);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+                                           base);
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
+                                           offset);
+}
+
+u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1,
+                                    enum aarch64_insn_register reg2,
+                                    enum aarch64_insn_register base,
+                                    int offset,
+                                    enum aarch64_insn_variant variant,
+                                    enum aarch64_insn_ldst_type type)
+{
+       u32 insn;
+       int shift;
+
+       switch (type) {
+       case AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX:
+               insn = aarch64_insn_get_ldp_pre_value();
+               break;
+       case AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX:
+               insn = aarch64_insn_get_stp_pre_value();
+               break;
+       case AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX:
+               insn = aarch64_insn_get_ldp_post_value();
+               break;
+       case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX:
+               insn = aarch64_insn_get_stp_post_value();
+               break;
+       default:
+               pr_err("%s: unknown load/store encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               if ((offset & 0x3) || (offset < -256) || (offset > 252)) {
+                       pr_err("%s: offset must be multiples of 4 in the range of [-256, 252] %d\n",
+                              __func__, offset);
+                       return AARCH64_BREAK_FAULT;
+               }
+               shift = 2;
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               if ((offset & 0x7) || (offset < -512) || (offset > 504)) {
+                       pr_err("%s: offset must be multiples of 8 in the range of [-512, 504] %d\n",
+                              __func__, offset);
+                       return AARCH64_BREAK_FAULT;
+               }
+               shift = 3;
+               insn |= AARCH64_INSN_SF_BIT;
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+                                           reg1);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
+                                           reg2);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+                                           base);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_7, insn,
+                                            offset >> shift);
+}
+
+u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
+                                  enum aarch64_insn_register base,
+                                  enum aarch64_insn_register state,
+                                  enum aarch64_insn_size_type size,
+                                  enum aarch64_insn_ldst_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_LDST_LOAD_EX:
+               insn = aarch64_insn_get_load_ex_value();
+               break;
+       case AARCH64_INSN_LDST_STORE_EX:
+               insn = aarch64_insn_get_store_ex_value();
+               break;
+       default:
+               pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_ldst_size(size, insn);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+                                           reg);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+                                           base);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT2, insn,
+                                           AARCH64_INSN_REG_ZR);
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+                                           state);
+}
+
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+                          enum aarch64_insn_register address,
+                          enum aarch64_insn_register value,
+                          enum aarch64_insn_size_type size)
+{
+       u32 insn = aarch64_insn_get_ldadd_value();
+
+       switch (size) {
+       case AARCH64_INSN_SIZE_32:
+       case AARCH64_INSN_SIZE_64:
+               break;
+       default:
+               pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_ldst_size(size, insn);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+                                           result);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+                                           address);
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+                                           value);
+}
+
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+                          enum aarch64_insn_register value,
+                          enum aarch64_insn_size_type size)
+{
+       /*
+        * STADD is simply encoded as an alias for LDADD with XZR as
+        * the destination register.
+        */
+       return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
+                                     value, size);
+}
+
+static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
+                                       enum aarch64_insn_prfm_target target,
+                                       enum aarch64_insn_prfm_policy policy,
+                                       u32 insn)
+{
+       u32 imm_type = 0, imm_target = 0, imm_policy = 0;
+
+       switch (type) {
+       case AARCH64_INSN_PRFM_TYPE_PLD:
+               break;
+       case AARCH64_INSN_PRFM_TYPE_PLI:
+               imm_type = BIT(0);
+               break;
+       case AARCH64_INSN_PRFM_TYPE_PST:
+               imm_type = BIT(1);
+               break;
+       default:
+               pr_err("%s: unknown prfm type encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (target) {
+       case AARCH64_INSN_PRFM_TARGET_L1:
+               break;
+       case AARCH64_INSN_PRFM_TARGET_L2:
+               imm_target = BIT(0);
+               break;
+       case AARCH64_INSN_PRFM_TARGET_L3:
+               imm_target = BIT(1);
+               break;
+       default:
+               pr_err("%s: unknown prfm target encoding %d\n", __func__, target);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (policy) {
+       case AARCH64_INSN_PRFM_POLICY_KEEP:
+               break;
+       case AARCH64_INSN_PRFM_POLICY_STRM:
+               imm_policy = BIT(0);
+               break;
+       default:
+               pr_err("%s: unknown prfm policy encoding %d\n", __func__, policy);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       /* In this case, imm5 is encoded into Rt field. */
+       insn &= ~GENMASK(4, 0);
+       insn |= imm_policy | (imm_target << 1) | (imm_type << 3);
+
+       return insn;
+}
+
+u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
+                             enum aarch64_insn_prfm_type type,
+                             enum aarch64_insn_prfm_target target,
+                             enum aarch64_insn_prfm_policy policy)
+{
+       u32 insn = aarch64_insn_get_prfm_value();
+
+       insn = aarch64_insn_encode_ldst_size(AARCH64_INSN_SIZE_64, insn);
+
+       insn = aarch64_insn_encode_prfm_imm(type, target, policy, insn);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+                                           base);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, 0);
+}
+
+u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
+                                enum aarch64_insn_register src,
+                                int imm, enum aarch64_insn_variant variant,
+                                enum aarch64_insn_adsb_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_ADSB_ADD:
+               insn = aarch64_insn_get_add_imm_value();
+               break;
+       case AARCH64_INSN_ADSB_SUB:
+               insn = aarch64_insn_get_sub_imm_value();
+               break;
+       case AARCH64_INSN_ADSB_ADD_SETFLAGS:
+               insn = aarch64_insn_get_adds_imm_value();
+               break;
+       case AARCH64_INSN_ADSB_SUB_SETFLAGS:
+               insn = aarch64_insn_get_subs_imm_value();
+               break;
+       default:
+               pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       /* We can't encode more than a 24bit value (12bit + 12bit shift) */
+       if (imm & ~(BIT(24) - 1))
+               goto out;
+
+       /* If we have something in the top 12 bits... */
+       if (imm & ~(SZ_4K - 1)) {
+               /* ... and in the low 12 bits -> error */
+               if (imm & (SZ_4K - 1))
+                       goto out;
+
+               imm >>= 12;
+               insn |= AARCH64_INSN_LSL_12;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm);
+
+out:
+       pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+       return AARCH64_BREAK_FAULT;
+}
+
+u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
+                             enum aarch64_insn_register src,
+                             int immr, int imms,
+                             enum aarch64_insn_variant variant,
+                             enum aarch64_insn_bitfield_type type)
+{
+       u32 insn;
+       u32 mask;
+
+       switch (type) {
+       case AARCH64_INSN_BITFIELD_MOVE:
+               insn = aarch64_insn_get_bfm_value();
+               break;
+       case AARCH64_INSN_BITFIELD_MOVE_UNSIGNED:
+               insn = aarch64_insn_get_ubfm_value();
+               break;
+       case AARCH64_INSN_BITFIELD_MOVE_SIGNED:
+               insn = aarch64_insn_get_sbfm_value();
+               break;
+       default:
+               pr_err("%s: unknown bitfield encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               mask = GENMASK(4, 0);
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT | AARCH64_INSN_N_BIT;
+               mask = GENMASK(5, 0);
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       if (immr & ~mask) {
+               pr_err("%s: invalid immr encoding %d\n", __func__, immr);
+               return AARCH64_BREAK_FAULT;
+       }
+       if (imms & ~mask) {
+               pr_err("%s: invalid imms encoding %d\n", __func__, imms);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_movewide(enum aarch64_insn_register dst,
+                             int imm, int shift,
+                             enum aarch64_insn_variant variant,
+                             enum aarch64_insn_movewide_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_MOVEWIDE_ZERO:
+               insn = aarch64_insn_get_movz_value();
+               break;
+       case AARCH64_INSN_MOVEWIDE_KEEP:
+               insn = aarch64_insn_get_movk_value();
+               break;
+       case AARCH64_INSN_MOVEWIDE_INVERSE:
+               insn = aarch64_insn_get_movn_value();
+               break;
+       default:
+               pr_err("%s: unknown movewide encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       if (imm & ~(SZ_64K - 1)) {
+               pr_err("%s: invalid immediate encoding %d\n", __func__, imm);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               if (shift != 0 && shift != 16) {
+                       pr_err("%s: invalid shift encoding %d\n", __func__,
+                              shift);
+                       return AARCH64_BREAK_FAULT;
+               }
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               if (shift != 0 && shift != 16 && shift != 32 && shift != 48) {
+                       pr_err("%s: invalid shift encoding %d\n", __func__,
+                              shift);
+                       return AARCH64_BREAK_FAULT;
+               }
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn |= (shift >> 4) << 21;
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
+}
+
+u32 aarch64_insn_gen_add_sub_shifted_reg(enum aarch64_insn_register dst,
+                                        enum aarch64_insn_register src,
+                                        enum aarch64_insn_register reg,
+                                        int shift,
+                                        enum aarch64_insn_variant variant,
+                                        enum aarch64_insn_adsb_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_ADSB_ADD:
+               insn = aarch64_insn_get_add_value();
+               break;
+       case AARCH64_INSN_ADSB_SUB:
+               insn = aarch64_insn_get_sub_value();
+               break;
+       case AARCH64_INSN_ADSB_ADD_SETFLAGS:
+               insn = aarch64_insn_get_adds_value();
+               break;
+       case AARCH64_INSN_ADSB_SUB_SETFLAGS:
+               insn = aarch64_insn_get_subs_value();
+               break;
+       default:
+               pr_err("%s: unknown add/sub encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               if (shift & ~(SZ_32 - 1)) {
+                       pr_err("%s: invalid shift encoding %d\n", __func__,
+                              shift);
+                       return AARCH64_BREAK_FAULT;
+               }
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               if (shift & ~(SZ_64 - 1)) {
+                       pr_err("%s: invalid shift encoding %d\n", __func__,
+                              shift);
+                       return AARCH64_BREAK_FAULT;
+               }
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
+}
+
+u32 aarch64_insn_gen_data1(enum aarch64_insn_register dst,
+                          enum aarch64_insn_register src,
+                          enum aarch64_insn_variant variant,
+                          enum aarch64_insn_data1_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_DATA1_REVERSE_16:
+               insn = aarch64_insn_get_rev16_value();
+               break;
+       case AARCH64_INSN_DATA1_REVERSE_32:
+               insn = aarch64_insn_get_rev32_value();
+               break;
+       case AARCH64_INSN_DATA1_REVERSE_64:
+               if (variant != AARCH64_INSN_VARIANT_64BIT) {
+                       pr_err("%s: invalid variant for reverse64 %d\n",
+                              __func__, variant);
+                       return AARCH64_BREAK_FAULT;
+               }
+               insn = aarch64_insn_get_rev64_value();
+               break;
+       default:
+               pr_err("%s: unknown data1 encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+}
+
+u32 aarch64_insn_gen_data2(enum aarch64_insn_register dst,
+                          enum aarch64_insn_register src,
+                          enum aarch64_insn_register reg,
+                          enum aarch64_insn_variant variant,
+                          enum aarch64_insn_data2_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_DATA2_UDIV:
+               insn = aarch64_insn_get_udiv_value();
+               break;
+       case AARCH64_INSN_DATA2_SDIV:
+               insn = aarch64_insn_get_sdiv_value();
+               break;
+       case AARCH64_INSN_DATA2_LSLV:
+               insn = aarch64_insn_get_lslv_value();
+               break;
+       case AARCH64_INSN_DATA2_LSRV:
+               insn = aarch64_insn_get_lsrv_value();
+               break;
+       case AARCH64_INSN_DATA2_ASRV:
+               insn = aarch64_insn_get_asrv_value();
+               break;
+       case AARCH64_INSN_DATA2_RORV:
+               insn = aarch64_insn_get_rorv_value();
+               break;
+       default:
+               pr_err("%s: unknown data2 encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+}
+
+u32 aarch64_insn_gen_data3(enum aarch64_insn_register dst,
+                          enum aarch64_insn_register src,
+                          enum aarch64_insn_register reg1,
+                          enum aarch64_insn_register reg2,
+                          enum aarch64_insn_variant variant,
+                          enum aarch64_insn_data3_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_DATA3_MADD:
+               insn = aarch64_insn_get_madd_value();
+               break;
+       case AARCH64_INSN_DATA3_MSUB:
+               insn = aarch64_insn_get_msub_value();
+               break;
+       default:
+               pr_err("%s: unknown data3 encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RA, insn, src);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+                                           reg1);
+
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn,
+                                           reg2);
+}
+
+u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
+                                        enum aarch64_insn_register src,
+                                        enum aarch64_insn_register reg,
+                                        int shift,
+                                        enum aarch64_insn_variant variant,
+                                        enum aarch64_insn_logic_type type)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_LOGIC_AND:
+               insn = aarch64_insn_get_and_value();
+               break;
+       case AARCH64_INSN_LOGIC_BIC:
+               insn = aarch64_insn_get_bic_value();
+               break;
+       case AARCH64_INSN_LOGIC_ORR:
+               insn = aarch64_insn_get_orr_value();
+               break;
+       case AARCH64_INSN_LOGIC_ORN:
+               insn = aarch64_insn_get_orn_value();
+               break;
+       case AARCH64_INSN_LOGIC_EOR:
+               insn = aarch64_insn_get_eor_value();
+               break;
+       case AARCH64_INSN_LOGIC_EON:
+               insn = aarch64_insn_get_eon_value();
+               break;
+       case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+               insn = aarch64_insn_get_ands_value();
+               break;
+       case AARCH64_INSN_LOGIC_BIC_SETFLAGS:
+               insn = aarch64_insn_get_bics_value();
+               break;
+       default:
+               pr_err("%s: unknown logical encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               if (shift & ~(SZ_32 - 1)) {
+                       pr_err("%s: invalid shift encoding %d\n", __func__,
+                              shift);
+                       return AARCH64_BREAK_FAULT;
+               }
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               if (shift & ~(SZ_64 - 1)) {
+                       pr_err("%s: invalid shift encoding %d\n", __func__,
+                              shift);
+                       return AARCH64_BREAK_FAULT;
+               }
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, dst);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, src);
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, reg);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
+}
+
+/*
+ * MOV (register) is architecturally an alias of ORR (shifted register) where
+ * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
+ */
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+                             enum aarch64_insn_register src,
+                             enum aarch64_insn_variant variant)
+{
+       return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
+                                                   src, 0, variant,
+                                                   AARCH64_INSN_LOGIC_ORR);
+}
+
+u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
+                        enum aarch64_insn_register reg,
+                        enum aarch64_insn_adr_type type)
+{
+       u32 insn;
+       s32 offset;
+
+       switch (type) {
+       case AARCH64_INSN_ADR_TYPE_ADR:
+               insn = aarch64_insn_get_adr_value();
+               offset = addr - pc;
+               break;
+       case AARCH64_INSN_ADR_TYPE_ADRP:
+               insn = aarch64_insn_get_adrp_value();
+               offset = (addr - ALIGN_DOWN(pc, SZ_4K)) >> 12;
+               break;
+       default:
+               pr_err("%s: unknown adr encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       if (offset < -SZ_1M || offset >= SZ_1M)
+               return AARCH64_BREAK_FAULT;
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, reg);
+
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, offset);
+}
+
+/*
+ * Decode the imm field of a branch, and return the byte offset as a
+ * signed value (so it can be used when computing a new branch
+ * target).
+ */
+s32 aarch64_get_branch_offset(u32 insn)
+{
+       s32 imm;
+
+       if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) {
+               imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn);
+               return (imm << 6) >> 4;
+       }
+
+       if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
+           aarch64_insn_is_bcond(insn)) {
+               imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_19, insn);
+               return (imm << 13) >> 11;
+       }
+
+       if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn)) {
+               imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_14, insn);
+               return (imm << 18) >> 16;
+       }
+
+       /* Unhandled instruction */
+       BUG();
+}
+
+/*
+ * Encode the displacement of a branch in the imm field and return the
+ * updated instruction.
+ */
+u32 aarch64_set_branch_offset(u32 insn, s32 offset)
+{
+       if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn))
+               return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn,
+                                                    offset >> 2);
+
+       if (aarch64_insn_is_cbz(insn) || aarch64_insn_is_cbnz(insn) ||
+           aarch64_insn_is_bcond(insn))
+               return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn,
+                                                    offset >> 2);
+
+       if (aarch64_insn_is_tbz(insn) || aarch64_insn_is_tbnz(insn))
+               return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_14, insn,
+                                                    offset >> 2);
+
+       /* Unhandled instruction */
+       BUG();
+}
+
+s32 aarch64_insn_adrp_get_offset(u32 insn)
+{
+       BUG_ON(!aarch64_insn_is_adrp(insn));
+       return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12;
+}
+
+u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset)
+{
+       BUG_ON(!aarch64_insn_is_adrp(insn));
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn,
+                                               offset >> 12);
+}
+
+/*
+ * Extract the Op/CR data from a msr/mrs instruction.
+ */
+u32 aarch64_insn_extract_system_reg(u32 insn)
+{
+       return (insn & 0x1FFFE0) >> 5;
+}
+
+bool aarch32_insn_is_wide(u32 insn)
+{
+       return insn >= 0xe800;
+}
+
+/*
+ * Macros/defines for extracting register numbers from instruction.
+ */
+u32 aarch32_insn_extract_reg_num(u32 insn, int offset)
+{
+       return (insn & (0xf << offset)) >> offset;
+}
+
+#define OPC2_MASK      0x7
+#define OPC2_OFFSET    5
+u32 aarch32_insn_mcr_extract_opc2(u32 insn)
+{
+       return (insn & (OPC2_MASK << OPC2_OFFSET)) >> OPC2_OFFSET;
+}
+
+#define CRM_MASK       0xf
+u32 aarch32_insn_mcr_extract_crm(u32 insn)
+{
+       return insn & CRM_MASK;
+}
+
+static bool range_of_ones(u64 val)
+{
+       /* Doesn't handle full ones or full zeroes */
+       u64 sval = val >> __ffs64(val);
+
+       /* One of Sean Eron Anderson's bithack tricks */
+       return ((sval + 1) & (sval)) == 0;
+}
+
+static u32 aarch64_encode_immediate(u64 imm,
+                                   enum aarch64_insn_variant variant,
+                                   u32 insn)
+{
+       unsigned int immr, imms, n, ones, ror, esz, tmp;
+       u64 mask;
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               esz = 32;
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               insn |= AARCH64_INSN_SF_BIT;
+               esz = 64;
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       mask = GENMASK(esz - 1, 0);
+
+       /* Can't encode full zeroes, full ones, or value wider than the mask */
+       if (!imm || imm == mask || imm & ~mask)
+               return AARCH64_BREAK_FAULT;
+
+       /*
+        * Inverse of Replicate(). Try to spot a repeating pattern
+        * with a pow2 stride.
+        */
+       for (tmp = esz / 2; tmp >= 2; tmp /= 2) {
+               u64 emask = BIT(tmp) - 1;
+
+               if ((imm & emask) != ((imm >> tmp) & emask))
+                       break;
+
+               esz = tmp;
+               mask = emask;
+       }
+
+       /* N is only set if we're encoding a 64bit value */
+       n = esz == 64;
+
+       /* Trim imm to the element size */
+       imm &= mask;
+
+       /* That's how many ones we need to encode */
+       ones = hweight64(imm);
+
+       /*
+        * imms is set to (ones - 1), prefixed with a string of ones
+        * and a zero if they fit. Cap it to 6 bits.
+        */
+       imms  = ones - 1;
+       imms |= 0xf << ffs(esz);
+       imms &= BIT(6) - 1;
+
+       /* Compute the rotation */
+       if (range_of_ones(imm)) {
+               /*
+                * Pattern: 0..01..10..0
+                *
+                * Compute how many rotate we need to align it right
+                */
+               ror = __ffs64(imm);
+       } else {
+               /*
+                * Pattern: 0..01..10..01..1
+                *
+                * Fill the unused top bits with ones, and check if
+                * the result is a valid immediate (all ones with a
+                * contiguous ranges of zeroes).
+                */
+               imm |= ~mask;
+               if (!range_of_ones(~imm))
+                       return AARCH64_BREAK_FAULT;
+
+               /*
+                * Compute the rotation to get a continuous set of
+                * ones, with the first bit set at position 0
+                */
+               ror = fls(~imm);
+       }
+
+       /*
+        * immr is the number of bits we need to rotate back to the
+        * original set of ones. Note that this is relative to the
+        * element size...
+        */
+       immr = (esz - ror) % esz;
+
+       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, n);
+       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_R, insn, immr);
+       return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, imms);
+}
+
+u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
+                                      enum aarch64_insn_variant variant,
+                                      enum aarch64_insn_register Rn,
+                                      enum aarch64_insn_register Rd,
+                                      u64 imm)
+{
+       u32 insn;
+
+       switch (type) {
+       case AARCH64_INSN_LOGIC_AND:
+               insn = aarch64_insn_get_and_imm_value();
+               break;
+       case AARCH64_INSN_LOGIC_ORR:
+               insn = aarch64_insn_get_orr_imm_value();
+               break;
+       case AARCH64_INSN_LOGIC_EOR:
+               insn = aarch64_insn_get_eor_imm_value();
+               break;
+       case AARCH64_INSN_LOGIC_AND_SETFLAGS:
+               insn = aarch64_insn_get_ands_imm_value();
+               break;
+       default:
+               pr_err("%s: unknown logical encoding %d\n", __func__, type);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+       return aarch64_encode_immediate(imm, variant, insn);
+}
+
+u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant,
+                         enum aarch64_insn_register Rm,
+                         enum aarch64_insn_register Rn,
+                         enum aarch64_insn_register Rd,
+                         u8 lsb)
+{
+       u32 insn;
+
+       insn = aarch64_insn_get_extr_value();
+
+       switch (variant) {
+       case AARCH64_INSN_VARIANT_32BIT:
+               if (lsb > 31)
+                       return AARCH64_BREAK_FAULT;
+               break;
+       case AARCH64_INSN_VARIANT_64BIT:
+               if (lsb > 63)
+                       return AARCH64_BREAK_FAULT;
+               insn |= AARCH64_INSN_SF_BIT;
+               insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_N, insn, 1);
+               break;
+       default:
+               pr_err("%s: unknown variant encoding %d\n", __func__, variant);
+               return AARCH64_BREAK_FAULT;
+       }
+
+       insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_S, insn, lsb);
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, Rd);
+       insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn);
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm);
+}
diff --git a/arch/arm64/lib/kasan_sw_tags.S b/arch/arm64/lib/kasan_sw_tags.S

new file mode 100644 (file)

index 0000000..5b04464
--- /dev/null
+++ b/arch/arm64/lib/kasan_sw_tags.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Google LLC
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Report a tag mismatch detected by tag-based KASAN.
+ *
+ * A compiler-generated thunk calls this with a non-AAPCS calling
+ * convention. Upon entry to this function, registers are as follows:
+ *
+ * x0:         fault address (see below for restore)
+ * x1:         fault description (see below for restore)
+ * x2 to x15:  callee-saved
+ * x16 to x17: safe to clobber
+ * x18 to x30: callee-saved
+ * sp:         pre-decremented by 256 bytes (see below for restore)
+ *
+ * The caller has decremented the SP by 256 bytes, and created a
+ * structure on the stack as follows:
+ *
+ * sp + 0..15:    x0 and x1 to be restored
+ * sp + 16..231:  free for use
+ * sp + 232..247: x29 and x30 (same as in GPRs)
+ * sp + 248..255: free for use
+ *
+ * Note that this is not a struct pt_regs.
+ *
+ * To call a regular AAPCS function we must save x2 to x15 (which we can
+ * store in the gaps), and create a frame record (for which we can use
+ * x29 and x30 spilled by the caller as those match the GPRs).
+ *
+ * The caller expects x0 and x1 to be restored from the structure, and
+ * for the structure to be removed from the stack (i.e. the SP must be
+ * incremented by 256 prior to return).
+ */
+SYM_CODE_START(__hwasan_tag_mismatch)
+#ifdef BTI_C
+       BTI_C
+#endif
+       add     x29, sp, #232
+       stp     x2, x3, [sp, #8 * 2]
+       stp     x4, x5, [sp, #8 * 4]
+       stp     x6, x7, [sp, #8 * 6]
+       stp     x8, x9, [sp, #8 * 8]
+       stp     x10, x11, [sp, #8 * 10]
+       stp     x12, x13, [sp, #8 * 12]
+       stp     x14, x15, [sp, #8 * 14]
+#ifndef CONFIG_SHADOW_CALL_STACK
+       str     x18, [sp, #8 * 18]
+#endif
+
+       mov     x2, x30
+       bl      kasan_tag_mismatch
+
+       ldp     x0, x1, [sp]
+       ldp     x2, x3, [sp, #8 * 2]
+       ldp     x4, x5, [sp, #8 * 4]
+       ldp     x6, x7, [sp, #8 * 6]
+       ldp     x8, x9, [sp, #8 * 8]
+       ldp     x10, x11, [sp, #8 * 10]
+       ldp     x12, x13, [sp, #8 * 12]
+       ldp     x14, x15, [sp, #8 * 14]
+#ifndef CONFIG_SHADOW_CALL_STACK
+       ldr     x18, [sp, #8 * 18]
+#endif
+       ldp     x29, x30, [sp, #8 * 29]
+
+       /* remove the structure from the stack */
+       add     sp, sp, #256
+       ret
+SYM_CODE_END(__hwasan_tag_mismatch)
+EXPORT_SYMBOL(__hwasan_tag_mismatch)
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S

index edf6b97..7c2276f 100644 (file)
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -1,9 +1,6 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
- * Based on arch/arm/lib/memchr.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2021 Arm Ltd.
   */
  
  #include <linux/linkage.h>
@@ -19,16 +16,60 @@
   * Returns:
   *     x0 - address of first occurrence of 'c' or 0
   */
+
+#define L(label) .L ## label
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+
+#define srcin          x0
+#define chrin          w1
+#define cntin          x2
+
+#define result         x0
+
+#define wordcnt                x3
+#define rep01          x4
+#define repchr         x5
+#define cur_word       x6
+#define cur_byte       w6
+#define tmp            x7
+#define tmp2           x8
+
+       .p2align 4
+       nop
  SYM_FUNC_START_WEAK_PI(memchr)
-       and     w1, w1, #0xff
-1:     subs    x2, x2, #1
-       b.mi    2f
-       ldrb    w3, [x0], #1
-       cmp     w3, w1
-       b.ne    1b
-       sub     x0, x0, #1
+       and     chrin, chrin, #0xff
+       lsr     wordcnt, cntin, #3
+       cbz     wordcnt, L(byte_loop)
+       mov     rep01, #REP8_01
+       mul     repchr, x1, rep01
+       and     cntin, cntin, #7
+L(word_loop):
+       ldr     cur_word, [srcin], #8
+       sub     wordcnt, wordcnt, #1
+       eor     cur_word, cur_word, repchr
+       sub     tmp, cur_word, rep01
+       orr     tmp2, cur_word, #REP8_7f
+       bics    tmp, tmp, tmp2
+       b.ne    L(found_word)
+       cbnz    wordcnt, L(word_loop)
+L(byte_loop):
+       cbz     cntin, L(not_found)
+       ldrb    cur_byte, [srcin], #1
+       sub     cntin, cntin, #1
+       cmp     cur_byte, chrin
+       b.ne    L(byte_loop)
+       sub     srcin, srcin, #1
+       ret
+L(found_word):
+CPU_LE(        rev     tmp, tmp)
+       clz     tmp, tmp
+       sub     tmp, tmp, #64
+       add     result, srcin, tmp, asr #3
         ret
-2:     mov     x0, #0
+L(not_found):
+       mov     result, #0
         ret
  SYM_FUNC_END_PI(memchr)
  EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S

index c0671e7..7d95638 100644 (file)
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -1,247 +1,139 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013-2021, Arm Limited.
   *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
   */
  
  #include <linux/linkage.h>
  #include <asm/assembler.h>
  
-/*
-* compare memory areas(when two memory areas' offset are different,
-* alignment handled by the hardware)
-*
-* Parameters:
-*  x0 - const memory area 1 pointer
-*  x1 - const memory area 2 pointer
-*  x2 - the maximal compare byte length
-* Returns:
-*  x0 - a compare result, maybe less than, equal to, or greater than ZERO
-*/
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ */
+
+#define L(label) .L ## label
  
  /* Parameters and result.  */
-src1           .req    x0
-src2           .req    x1
-limit          .req    x2
-result         .req    x0
+#define src1           x0
+#define src2           x1
+#define limit          x2
+#define result         w0
  
  /* Internal variables.  */
-data1          .req    x3
-data1w         .req    w3
-data2          .req    x4
-data2w         .req    w4
-has_nul                .req    x5
-diff           .req    x6
-endloop                .req    x7
-tmp1           .req    x8
-tmp2           .req    x9
-tmp3           .req    x10
-pos            .req    x11
-limit_wd       .req    x12
-mask           .req    x13
+#define data1          x3
+#define data1w         w3
+#define data1h         x4
+#define data2          x5
+#define data2w         w5
+#define data2h         x6
+#define tmp1           x7
+#define tmp2           x8
  
  SYM_FUNC_START_WEAK_PI(memcmp)
-       cbz     limit, .Lret0
-       eor     tmp1, src1, src2
-       tst     tmp1, #7
-       b.ne    .Lmisaligned8
-       ands    tmp1, src1, #7
-       b.ne    .Lmutual_align
-       sub     limit_wd, limit, #1 /* limit != 0, so no underflow.  */
-       lsr     limit_wd, limit_wd, #3 /* Convert to Dwords.  */
-       /*
-       * The input source addresses are at alignment boundary.
-       * Directly compare eight bytes each time.
-       */
-.Lloop_aligned:
-       ldr     data1, [src1], #8
-       ldr     data2, [src2], #8
-.Lstart_realigned:
-       subs    limit_wd, limit_wd, #1
-       eor     diff, data1, data2      /* Non-zero if differences found.  */
-       csinv   endloop, diff, xzr, cs  /* Last Dword or differences.  */
-       cbz     endloop, .Lloop_aligned
-
-       /* Not reached the limit, must have found a diff.  */
-       tbz     limit_wd, #63, .Lnot_limit
-
-       /* Limit % 8 == 0 => the diff is in the last 8 bytes. */
-       ands    limit, limit, #7
-       b.eq    .Lnot_limit
-       /*
-       * The remained bytes less than 8. It is needed to extract valid data
-       * from last eight bytes of the intended memory range.
-       */
-       lsl     limit, limit, #3        /* bytes-> bits.  */
-       mov     mask, #~0
-CPU_BE( lsr    mask, mask, limit )
-CPU_LE( lsl    mask, mask, limit )
-       bic     data1, data1, mask
-       bic     data2, data2, mask
-
-       orr     diff, diff, mask
-       b       .Lnot_limit
-
-.Lmutual_align:
-       /*
-       * Sources are mutually aligned, but are not currently at an
-       * alignment boundary. Round down the addresses and then mask off
-       * the bytes that precede the start point.
-       */
-       bic     src1, src1, #7
-       bic     src2, src2, #7
-       ldr     data1, [src1], #8
-       ldr     data2, [src2], #8
-       /*
-       * We can not add limit with alignment offset(tmp1) here. Since the
-       * addition probably make the limit overflown.
-       */
-       sub     limit_wd, limit, #1/*limit != 0, so no underflow.*/
-       and     tmp3, limit_wd, #7
-       lsr     limit_wd, limit_wd, #3
-       add     tmp3, tmp3, tmp1
-       add     limit_wd, limit_wd, tmp3, lsr #3
-       add     limit, limit, tmp1/* Adjust the limit for the extra.  */
-
-       lsl     tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
-       neg     tmp1, tmp1/* Bits to alignment -64.  */
-       mov     tmp2, #~0
-       /*mask off the non-intended bytes before the start address.*/
-CPU_BE( lsl    tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
-       /* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr    tmp2, tmp2, tmp1 )
-
-       orr     data1, data1, tmp2
-       orr     data2, data2, tmp2
-       b       .Lstart_realigned
-
-       /*src1 and src2 have different alignment offset.*/
-.Lmisaligned8:
-       cmp     limit, #8
-       b.lo    .Ltiny8proc /*limit < 8: compare byte by byte*/
-
-       and     tmp1, src1, #7
-       neg     tmp1, tmp1
-       add     tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
-       and     tmp2, src2, #7
-       neg     tmp2, tmp2
-       add     tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
-       subs    tmp3, tmp1, tmp2
-       csel    pos, tmp1, tmp2, hi /*Choose the maximum.*/
-
-       sub     limit, limit, pos
-       /*compare the proceeding bytes in the first 8 byte segment.*/
-.Ltinycmp:
-       ldrb    data1w, [src1], #1
-       ldrb    data2w, [src2], #1
-       subs    pos, pos, #1
-       ccmp    data1w, data2w, #0, ne  /* NZCV = 0b0000.  */
-       b.eq    .Ltinycmp
-       cbnz    pos, 1f /*diff occurred before the last byte.*/
-       cmp     data1w, data2w
-       b.eq    .Lstart_align
-1:
-       sub     result, data1, data2
+       subs    limit, limit, 8
+       b.lo    L(less8)
+
+       ldr     data1, [src1], 8
+       ldr     data2, [src2], 8
+       cmp     data1, data2
+       b.ne    L(return)
+
+       subs    limit, limit, 8
+       b.gt    L(more16)
+
+       ldr     data1, [src1, limit]
+       ldr     data2, [src2, limit]
+       b       L(return)
+
+L(more16):
+       ldr     data1, [src1], 8
+       ldr     data2, [src2], 8
+       cmp     data1, data2
+       bne     L(return)
+
+       /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
+          strings.  */
+       subs    limit, limit, 16
+       b.ls    L(last_bytes)
+
+       /* We overlap loads between 0-32 bytes at either side of SRC1 when we
+          try to align, so limit it only to strings larger than 128 bytes.  */
+       cmp     limit, 96
+       b.ls    L(loop16)
+
+       /* Align src1 and adjust src2 with bytes not yet done.  */
+       and     tmp1, src1, 15
+       add     limit, limit, tmp1
+       sub     src1, src1, tmp1
+       sub     src2, src2, tmp1
+
+       /* Loop performing 16 bytes per iteration using aligned src1.
+          Limit is pre-decremented by 16 and must be larger than zero.
+          Exit if <= 16 bytes left to do or if the data is not equal.  */
+       .p2align 4
+L(loop16):
+       ldp     data1, data1h, [src1], 16
+       ldp     data2, data2h, [src2], 16
+       subs    limit, limit, 16
+       ccmp    data1, data2, 0, hi
+       ccmp    data1h, data2h, 0, eq
+       b.eq    L(loop16)
+
+       cmp     data1, data2
+       bne     L(return)
+       mov     data1, data1h
+       mov     data2, data2h
+       cmp     data1, data2
+       bne     L(return)
+
+       /* Compare last 1-16 bytes using unaligned access.  */
+L(last_bytes):
+       add     src1, src1, limit
+       add     src2, src2, limit
+       ldp     data1, data1h, [src1]
+       ldp     data2, data2h, [src2]
+       cmp     data1, data2
+       bne     L(return)
+       mov     data1, data1h
+       mov     data2, data2h
+       cmp     data1, data2
+
+       /* Compare data bytes and set return value to 0, -1 or 1.  */
+L(return):
+#ifndef __AARCH64EB__
+       rev     data1, data1
+       rev     data2, data2
+#endif
+       cmp     data1, data2
+L(ret_eq):
+       cset    result, ne
+       cneg    result, result, lo
         ret
  
-.Lstart_align:
-       lsr     limit_wd, limit, #3
-       cbz     limit_wd, .Lremain8
-
-       ands    xzr, src1, #7
-       b.eq    .Lrecal_offset
-       /*process more leading bytes to make src1 aligned...*/
-       add     src1, src1, tmp3 /*backwards src1 to alignment boundary*/
-       add     src2, src2, tmp3
-       sub     limit, limit, tmp3
-       lsr     limit_wd, limit, #3
-       cbz     limit_wd, .Lremain8
-       /*load 8 bytes from aligned SRC1..*/
-       ldr     data1, [src1], #8
-       ldr     data2, [src2], #8
-
-       subs    limit_wd, limit_wd, #1
-       eor     diff, data1, data2  /*Non-zero if differences found.*/
-       csinv   endloop, diff, xzr, ne
-       cbnz    endloop, .Lunequal_proc
-       /*How far is the current SRC2 from the alignment boundary...*/
-       and     tmp3, tmp3, #7
-
-.Lrecal_offset:/*src1 is aligned now..*/
-       neg     pos, tmp3
-.Lloopcmp_proc:
-       /*
-       * Divide the eight bytes into two parts. First,backwards the src2
-       * to an alignment boundary,load eight bytes and compare from
-       * the SRC2 alignment boundary. If all 8 bytes are equal,then start
-       * the second part's comparison. Otherwise finish the comparison.
-       * This special handle can garantee all the accesses are in the
-       * thread/task space in avoid to overrange access.
-       */
-       ldr     data1, [src1,pos]
-       ldr     data2, [src2,pos]
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
-       cbnz    diff, .Lnot_limit
-
-       /*The second part process*/
-       ldr     data1, [src1], #8
-       ldr     data2, [src2], #8
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
-       subs    limit_wd, limit_wd, #1
-       csinv   endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-       cbz     endloop, .Lloopcmp_proc
-.Lunequal_proc:
-       cbz     diff, .Lremain8
-
-/* There is difference occurred in the latest comparison. */
-.Lnot_limit:
-/*
-* For little endian,reverse the low significant equal bits into MSB,then
-* following CLZ can find how many equal bits exist.
-*/
-CPU_LE( rev    diff, diff )
-CPU_LE( rev    data1, data1 )
-CPU_LE( rev    data2, data2 )
-
-       /*
-       * The MS-non-zero bit of DIFF marks either the first bit
-       * that is different, or the end of the significant data.
-       * Shifting left now will bring the critical information into the
-       * top bits.
-       */
-       clz     pos, diff
-       lsl     data1, data1, pos
-       lsl     data2, data2, pos
-       /*
-       * We need to zero-extend (char is unsigned) the value and then
-       * perform a signed subtraction.
-       */
-       lsr     data1, data1, #56
-       sub     result, data1, data2, lsr #56
+       .p2align 4
+       /* Compare up to 8 bytes.  Limit is [-8..-1].  */
+L(less8):
+       adds    limit, limit, 4
+       b.lo    L(less4)
+       ldr     data1w, [src1], 4
+       ldr     data2w, [src2], 4
+       cmp     data1w, data2w
+       b.ne    L(return)
+       sub     limit, limit, 4
+L(less4):
+       adds    limit, limit, 4
+       beq     L(ret_eq)
+L(byte_loop):
+       ldrb    data1w, [src1], 1
+       ldrb    data2w, [src2], 1
+       subs    limit, limit, 1
+       ccmp    data1w, data2w, 0, ne   /* NZCV = 0b0000.  */
+       b.eq    L(byte_loop)
+       sub     result, data1w, data2w
         ret
  
-.Lremain8:
-       /* Limit % 8 == 0 =>. all data are equal.*/
-       ands    limit, limit, #7
-       b.eq    .Lret0
-
-.Ltiny8proc:
-       ldrb    data1w, [src1], #1
-       ldrb    data2w, [src2], #1
-       subs    limit, limit, #1
-
-       ccmp    data1w, data2w, #0, ne  /* NZCV = 0b0000. */
-       b.eq    .Ltiny8proc
-       sub     result, data1, data2
-       ret
-.Lret0:
-       mov     result, #0
-       ret
  SYM_FUNC_END_PI(memcmp)
  EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S

index dc8d2a2..b82fd64 100644 (file)
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -1,66 +1,252 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2012-2021, Arm Limited.
   *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/memcpy.S
   */
  
  #include <linux/linkage.h>
  #include <asm/assembler.h>
-#include <asm/cache.h>
  
-/*
- * Copy a buffer from src to dest (alignment handled by the hardware)
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
   *
- * Parameters:
- *     x0 - dest
- *     x1 - src
- *     x2 - n
- * Returns:
- *     x0 - dest
   */
-       .macro ldrb1 reg, ptr, val
-       ldrb  \reg, [\ptr], \val
-       .endm
-
-       .macro strb1 reg, ptr, val
-       strb \reg, [\ptr], \val
-       .endm
  
-       .macro ldrh1 reg, ptr, val
-       ldrh  \reg, [\ptr], \val
-       .endm
+#define L(label) .L ## label
  
-       .macro strh1 reg, ptr, val
-       strh \reg, [\ptr], \val
-       .endm
+#define dstin  x0
+#define src    x1
+#define count  x2
+#define dst    x3
+#define srcend x4
+#define dstend x5
+#define A_l    x6
+#define A_lw   w6
+#define A_h    x7
+#define B_l    x8
+#define B_lw   w8
+#define B_h    x9
+#define C_l    x10
+#define C_lw   w10
+#define C_h    x11
+#define D_l    x12
+#define D_h    x13
+#define E_l    x14
+#define E_h    x15
+#define F_l    x16
+#define F_h    x17
+#define G_l    count
+#define G_h    dst
+#define H_l    src
+#define H_h    srcend
+#define tmp1   x14
  
-       .macro ldr1 reg, ptr, val
-       ldr \reg, [\ptr], \val
-       .endm
+/* This implementation handles overlaps and supports both memcpy and memmove
+   from a single entry point.  It uses unaligned accesses and branchless
+   sequences to keep the code small, simple and improve performance.
  
-       .macro str1 reg, ptr, val
-       str \reg, [\ptr], \val
-       .endm
+   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+   copies of up to 128 bytes, and large copies.  The overhead of the overlap
+   check is negligible since it is only required for large copies.
  
-       .macro ldp1 reg1, reg2, ptr, val
-       ldp \reg1, \reg2, [\ptr], \val
-       .endm
-
-       .macro stp1 reg1, reg2, ptr, val
-       stp \reg1, \reg2, [\ptr], \val
-       .endm
+   Large copies use a software pipelined loop processing 64 bytes per iteration.
+   The destination pointer is 16-byte aligned to minimize unaligned accesses.
+   The loop tail is handled by always copying 64 bytes from the end.
+*/
  
+SYM_FUNC_START_ALIAS(__memmove)
+SYM_FUNC_START_WEAK_ALIAS_PI(memmove)
  SYM_FUNC_START_ALIAS(__memcpy)
  SYM_FUNC_START_WEAK_PI(memcpy)
-#include "copy_template.S"
+       add     srcend, src, count
+       add     dstend, dstin, count
+       cmp     count, 128
+       b.hi    L(copy_long)
+       cmp     count, 32
+       b.hi    L(copy32_128)
+
+       /* Small copies: 0..32 bytes.  */
+       cmp     count, 16
+       b.lo    L(copy16)
+       ldp     A_l, A_h, [src]
+       ldp     D_l, D_h, [srcend, -16]
+       stp     A_l, A_h, [dstin]
+       stp     D_l, D_h, [dstend, -16]
+       ret
+
+       /* Copy 8-15 bytes.  */
+L(copy16):
+       tbz     count, 3, L(copy8)
+       ldr     A_l, [src]
+       ldr     A_h, [srcend, -8]
+       str     A_l, [dstin]
+       str     A_h, [dstend, -8]
+       ret
+
+       .p2align 3
+       /* Copy 4-7 bytes.  */
+L(copy8):
+       tbz     count, 2, L(copy4)
+       ldr     A_lw, [src]
+       ldr     B_lw, [srcend, -4]
+       str     A_lw, [dstin]
+       str     B_lw, [dstend, -4]
+       ret
+
+       /* Copy 0..3 bytes using a branchless sequence.  */
+L(copy4):
+       cbz     count, L(copy0)
+       lsr     tmp1, count, 1
+       ldrb    A_lw, [src]
+       ldrb    C_lw, [srcend, -1]
+       ldrb    B_lw, [src, tmp1]
+       strb    A_lw, [dstin]
+       strb    B_lw, [dstin, tmp1]
+       strb    C_lw, [dstend, -1]
+L(copy0):
+       ret
+
+       .p2align 4
+       /* Medium copies: 33..128 bytes.  */
+L(copy32_128):
+       ldp     A_l, A_h, [src]
+       ldp     B_l, B_h, [src, 16]
+       ldp     C_l, C_h, [srcend, -32]
+       ldp     D_l, D_h, [srcend, -16]
+       cmp     count, 64
+       b.hi    L(copy128)
+       stp     A_l, A_h, [dstin]
+       stp     B_l, B_h, [dstin, 16]
+       stp     C_l, C_h, [dstend, -32]
+       stp     D_l, D_h, [dstend, -16]
         ret
+
+       .p2align 4
+       /* Copy 65..128 bytes.  */
+L(copy128):
+       ldp     E_l, E_h, [src, 32]
+       ldp     F_l, F_h, [src, 48]
+       cmp     count, 96
+       b.ls    L(copy96)
+       ldp     G_l, G_h, [srcend, -64]
+       ldp     H_l, H_h, [srcend, -48]
+       stp     G_l, G_h, [dstend, -64]
+       stp     H_l, H_h, [dstend, -48]
+L(copy96):
+       stp     A_l, A_h, [dstin]
+       stp     B_l, B_h, [dstin, 16]
+       stp     E_l, E_h, [dstin, 32]
+       stp     F_l, F_h, [dstin, 48]
+       stp     C_l, C_h, [dstend, -32]
+       stp     D_l, D_h, [dstend, -16]
+       ret
+
+       .p2align 4
+       /* Copy more than 128 bytes.  */
+L(copy_long):
+       /* Use backwards copy if there is an overlap.  */
+       sub     tmp1, dstin, src
+       cbz     tmp1, L(copy0)
+       cmp     tmp1, count
+       b.lo    L(copy_long_backwards)
+
+       /* Copy 16 bytes and then align dst to 16-byte alignment.  */
+
+       ldp     D_l, D_h, [src]
+       and     tmp1, dstin, 15
+       bic     dst, dstin, 15
+       sub     src, src, tmp1
+       add     count, count, tmp1      /* Count is now 16 too large.  */
+       ldp     A_l, A_h, [src, 16]
+       stp     D_l, D_h, [dstin]
+       ldp     B_l, B_h, [src, 32]
+       ldp     C_l, C_h, [src, 48]
+       ldp     D_l, D_h, [src, 64]!
+       subs    count, count, 128 + 16  /* Test and readjust count.  */
+       b.ls    L(copy64_from_end)
+
+L(loop64):
+       stp     A_l, A_h, [dst, 16]
+       ldp     A_l, A_h, [src, 16]
+       stp     B_l, B_h, [dst, 32]
+       ldp     B_l, B_h, [src, 32]
+       stp     C_l, C_h, [dst, 48]
+       ldp     C_l, C_h, [src, 48]
+       stp     D_l, D_h, [dst, 64]!
+       ldp     D_l, D_h, [src, 64]!
+       subs    count, count, 64
+       b.hi    L(loop64)
+
+       /* Write the last iteration and copy 64 bytes from the end.  */
+L(copy64_from_end):
+       ldp     E_l, E_h, [srcend, -64]
+       stp     A_l, A_h, [dst, 16]
+       ldp     A_l, A_h, [srcend, -48]
+       stp     B_l, B_h, [dst, 32]
+       ldp     B_l, B_h, [srcend, -32]
+       stp     C_l, C_h, [dst, 48]
+       ldp     C_l, C_h, [srcend, -16]
+       stp     D_l, D_h, [dst, 64]
+       stp     E_l, E_h, [dstend, -64]
+       stp     A_l, A_h, [dstend, -48]
+       stp     B_l, B_h, [dstend, -32]
+       stp     C_l, C_h, [dstend, -16]
+       ret
+
+       .p2align 4
+
+       /* Large backwards copy for overlapping copies.
+          Copy 16 bytes and then align dst to 16-byte alignment.  */
+L(copy_long_backwards):
+       ldp     D_l, D_h, [srcend, -16]
+       and     tmp1, dstend, 15
+       sub     srcend, srcend, tmp1
+       sub     count, count, tmp1
+       ldp     A_l, A_h, [srcend, -16]
+       stp     D_l, D_h, [dstend, -16]
+       ldp     B_l, B_h, [srcend, -32]
+       ldp     C_l, C_h, [srcend, -48]
+       ldp     D_l, D_h, [srcend, -64]!
+       sub     dstend, dstend, tmp1
+       subs    count, count, 128
+       b.ls    L(copy64_from_start)
+
+L(loop64_backwards):
+       stp     A_l, A_h, [dstend, -16]
+       ldp     A_l, A_h, [srcend, -16]
+       stp     B_l, B_h, [dstend, -32]
+       ldp     B_l, B_h, [srcend, -32]
+       stp     C_l, C_h, [dstend, -48]
+       ldp     C_l, C_h, [srcend, -48]
+       stp     D_l, D_h, [dstend, -64]!
+       ldp     D_l, D_h, [srcend, -64]!
+       subs    count, count, 64
+       b.hi    L(loop64_backwards)
+
+       /* Write the last iteration and copy 64 bytes from the start.  */
+L(copy64_from_start):
+       ldp     G_l, G_h, [src, 48]
+       stp     A_l, A_h, [dstend, -16]
+       ldp     A_l, A_h, [src, 32]
+       stp     B_l, B_h, [dstend, -32]
+       ldp     B_l, B_h, [src, 16]
+       stp     C_l, C_h, [dstend, -48]
+       ldp     C_l, C_h, [src]
+       stp     D_l, D_h, [dstend, -64]
+       stp     G_l, G_h, [dstin, 48]
+       stp     A_l, A_h, [dstin, 32]
+       stp     B_l, B_h, [dstin, 16]
+       stp     C_l, C_h, [dstin]
+       ret
+
  SYM_FUNC_END_PI(memcpy)
  EXPORT_SYMBOL(memcpy)
  SYM_FUNC_END_ALIAS(__memcpy)
  EXPORT_SYMBOL(__memcpy)
+SYM_FUNC_END_ALIAS_PI(memmove)
+EXPORT_SYMBOL(memmove)
+SYM_FUNC_END_ALIAS(__memmove)
+EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S

deleted file mode 100644 (file)

index 1035dce..0000000
--- a/arch/arm64/lib/memmove.S
+++ /dev/null
@@ -1,189 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Move a buffer from src to test (alignment handled by the hardware).
- * If dest <= src, call memcpy, otherwise copy in reverse order.
- *
- * Parameters:
- *     x0 - dest
- *     x1 - src
- *     x2 - n
- * Returns:
- *     x0 - dest
- */
-dstin  .req    x0
-src    .req    x1
-count  .req    x2
-tmp1   .req    x3
-tmp1w  .req    w3
-tmp2   .req    x4
-tmp2w  .req    w4
-tmp3   .req    x5
-tmp3w  .req    w5
-dst    .req    x6
-
-A_l    .req    x7
-A_h    .req    x8
-B_l    .req    x9
-B_h    .req    x10
-C_l    .req    x11
-C_h    .req    x12
-D_l    .req    x13
-D_h    .req    x14
-
-SYM_FUNC_START_ALIAS(__memmove)
-SYM_FUNC_START_WEAK_PI(memmove)
-       cmp     dstin, src
-       b.lo    __memcpy
-       add     tmp1, src, count
-       cmp     dstin, tmp1
-       b.hs    __memcpy                /* No overlap.  */
-
-       add     dst, dstin, count
-       add     src, src, count
-       cmp     count, #16
-       b.lo    .Ltail15  /*probably non-alignment accesses.*/
-
-       ands    tmp2, src, #15     /* Bytes to reach alignment.  */
-       b.eq    .LSrcAligned
-       sub     count, count, tmp2
-       /*
-       * process the aligned offset length to make the src aligned firstly.
-       * those extra instructions' cost is acceptable. It also make the
-       * coming accesses are based on aligned address.
-       */
-       tbz     tmp2, #0, 1f
-       ldrb    tmp1w, [src, #-1]!
-       strb    tmp1w, [dst, #-1]!
-1:
-       tbz     tmp2, #1, 2f
-       ldrh    tmp1w, [src, #-2]!
-       strh    tmp1w, [dst, #-2]!
-2:
-       tbz     tmp2, #2, 3f
-       ldr     tmp1w, [src, #-4]!
-       str     tmp1w, [dst, #-4]!
-3:
-       tbz     tmp2, #3, .LSrcAligned
-       ldr     tmp1, [src, #-8]!
-       str     tmp1, [dst, #-8]!
-
-.LSrcAligned:
-       cmp     count, #64
-       b.ge    .Lcpy_over64
-
-       /*
-       * Deal with small copies quickly by dropping straight into the
-       * exit block.
-       */
-.Ltail63:
-       /*
-       * Copy up to 48 bytes of data. At this point we only need the
-       * bottom 6 bits of count to be accurate.
-       */
-       ands    tmp1, count, #0x30
-       b.eq    .Ltail15
-       cmp     tmp1w, #0x20
-       b.eq    1f
-       b.lt    2f
-       ldp     A_l, A_h, [src, #-16]!
-       stp     A_l, A_h, [dst, #-16]!
-1:
-       ldp     A_l, A_h, [src, #-16]!
-       stp     A_l, A_h, [dst, #-16]!
-2:
-       ldp     A_l, A_h, [src, #-16]!
-       stp     A_l, A_h, [dst, #-16]!
-
-.Ltail15:
-       tbz     count, #3, 1f
-       ldr     tmp1, [src, #-8]!
-       str     tmp1, [dst, #-8]!
-1:
-       tbz     count, #2, 2f
-       ldr     tmp1w, [src, #-4]!
-       str     tmp1w, [dst, #-4]!
-2:
-       tbz     count, #1, 3f
-       ldrh    tmp1w, [src, #-2]!
-       strh    tmp1w, [dst, #-2]!
-3:
-       tbz     count, #0, .Lexitfunc
-       ldrb    tmp1w, [src, #-1]
-       strb    tmp1w, [dst, #-1]
-
-.Lexitfunc:
-       ret
-
-.Lcpy_over64:
-       subs    count, count, #128
-       b.ge    .Lcpy_body_large
-       /*
-       * Less than 128 bytes to copy, so handle 64 bytes here and then jump
-       * to the tail.
-       */
-       ldp     A_l, A_h, [src, #-16]
-       stp     A_l, A_h, [dst, #-16]
-       ldp     B_l, B_h, [src, #-32]
-       ldp     C_l, C_h, [src, #-48]
-       stp     B_l, B_h, [dst, #-32]
-       stp     C_l, C_h, [dst, #-48]
-       ldp     D_l, D_h, [src, #-64]!
-       stp     D_l, D_h, [dst, #-64]!
-
-       tst     count, #0x3f
-       b.ne    .Ltail63
-       ret
-
-       /*
-       * Critical loop. Start at a new cache line boundary. Assuming
-       * 64 bytes per line this ensures the entire loop is in one line.
-       */
-       .p2align        L1_CACHE_SHIFT
-.Lcpy_body_large:
-       /* pre-load 64 bytes data. */
-       ldp     A_l, A_h, [src, #-16]
-       ldp     B_l, B_h, [src, #-32]
-       ldp     C_l, C_h, [src, #-48]
-       ldp     D_l, D_h, [src, #-64]!
-1:
-       /*
-       * interlace the load of next 64 bytes data block with store of the last
-       * loaded 64 bytes data.
-       */
-       stp     A_l, A_h, [dst, #-16]
-       ldp     A_l, A_h, [src, #-16]
-       stp     B_l, B_h, [dst, #-32]
-       ldp     B_l, B_h, [src, #-32]
-       stp     C_l, C_h, [dst, #-48]
-       ldp     C_l, C_h, [src, #-48]
-       stp     D_l, D_h, [dst, #-64]!
-       ldp     D_l, D_h, [src, #-64]!
-       subs    count, count, #64
-       b.ge    1b
-       stp     A_l, A_h, [dst, #-16]
-       stp     B_l, B_h, [dst, #-32]
-       stp     C_l, C_h, [dst, #-48]
-       stp     D_l, D_h, [dst, #-64]!
-
-       tst     count, #0x3f
-       b.ne    .Ltail63
-       ret
-SYM_FUNC_END_PI(memmove)
-EXPORT_SYMBOL(memmove)
-SYM_FUNC_END_ALIAS(__memmove)
-EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S

index 4e79566..d7bee21 100644 (file)
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -1,84 +1,123 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2012-2021, Arm Limited.
   *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S
   */
  
  #include <linux/linkage.h>
  #include <asm/assembler.h>
  
-/*
- * compare two strings
+/* Assumptions:
   *
- * Parameters:
- *     x0 - const string 1 pointer
- *    x1 - const string 2 pointer
- * Returns:
- * x0 - an integer less than, equal to, or greater than zero
- * if  s1  is  found, respectively, to be less than, to match,
- * or be greater than s2.
+ * ARMv8-a, AArch64
   */
  
+#define L(label) .L ## label
+
  #define REP8_01 0x0101010101010101
  #define REP8_7f 0x7f7f7f7f7f7f7f7f
  #define REP8_80 0x8080808080808080
  
  /* Parameters and result.  */
-src1           .req    x0
-src2           .req    x1
-result         .req    x0
+#define src1           x0
+#define src2           x1
+#define result         x0
  
  /* Internal variables.  */
-data1          .req    x2
-data1w         .req    w2
-data2          .req    x3
-data2w         .req    w3
-has_nul                .req    x4
-diff           .req    x5
-syndrome       .req    x6
-tmp1           .req    x7
-tmp2           .req    x8
-tmp3           .req    x9
-zeroones       .req    x10
-pos            .req    x11
-
+#define data1          x2
+#define data1w         w2
+#define data2          x3
+#define data2w         w3
+#define has_nul                x4
+#define diff           x5
+#define syndrome       x6
+#define tmp1           x7
+#define tmp2           x8
+#define tmp3           x9
+#define zeroones       x10
+#define pos            x11
+
+       /* Start of performance-critical section  -- one 64B cache line.  */
+       .align 6
  SYM_FUNC_START_WEAK_PI(strcmp)
         eor     tmp1, src1, src2
         mov     zeroones, #REP8_01
         tst     tmp1, #7
-       b.ne    .Lmisaligned8
+       b.ne    L(misaligned8)
         ands    tmp1, src1, #7
-       b.ne    .Lmutual_align
-
-       /*
-       * NUL detection works on the principle that (X - 1) & (~X) & 0x80
-       * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-       * can be done in parallel across the entire word.
-       */
-.Lloop_aligned:
+       b.ne    L(mutual_align)
+       /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+          (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+          can be done in parallel across the entire word.  */
+L(loop_aligned):
         ldr     data1, [src1], #8
         ldr     data2, [src2], #8
-.Lstart_realigned:
+L(start_realigned):
         sub     tmp1, data1, zeroones
         orr     tmp2, data1, #REP8_7f
         eor     diff, data1, data2      /* Non-zero if differences found.  */
         bic     has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
         orr     syndrome, diff, has_nul
-       cbz     syndrome, .Lloop_aligned
-       b       .Lcal_cmpresult
+       cbz     syndrome, L(loop_aligned)
+       /* End of performance-critical section  -- one 64B cache line.  */
+
+L(end):
+#ifndef        __AARCH64EB__
+       rev     syndrome, syndrome
+       rev     data1, data1
+       /* The MS-non-zero bit of the syndrome marks either the first bit
+          that is different, or the top bit of the first zero byte.
+          Shifting left now will bring the critical information into the
+          top bits.  */
+       clz     pos, syndrome
+       rev     data2, data2
+       lsl     data1, data1, pos
+       lsl     data2, data2, pos
+       /* But we need to zero-extend (char is unsigned) the value and then
+          perform a signed 32-bit subtraction.  */
+       lsr     data1, data1, #56
+       sub     result, data1, data2, lsr #56
+       ret
+#else
+       /* For big-endian we cannot use the trick with the syndrome value
+          as carry-propagation can corrupt the upper bits if the trailing
+          bytes in the string contain 0x01.  */
+       /* However, if there is no NUL byte in the dword, we can generate
+          the result directly.  We can't just subtract the bytes as the
+          MSB might be significant.  */
+       cbnz    has_nul, 1f
+       cmp     data1, data2
+       cset    result, ne
+       cneg    result, result, lo
+       ret
+1:
+       /* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+       rev     tmp3, data1
+       sub     tmp1, tmp3, zeroones
+       orr     tmp2, tmp3, #REP8_7f
+       bic     has_nul, tmp1, tmp2
+       rev     has_nul, has_nul
+       orr     syndrome, diff, has_nul
+       clz     pos, syndrome
+       /* The MS-non-zero bit of the syndrome marks either the first bit
+          that is different, or the top bit of the first zero byte.
+          Shifting left now will bring the critical information into the
+          top bits.  */
+       lsl     data1, data1, pos
+       lsl     data2, data2, pos
+       /* But we need to zero-extend (char is unsigned) the value and then
+          perform a signed 32-bit subtraction.  */
+       lsr     data1, data1, #56
+       sub     result, data1, data2, lsr #56
+       ret
+#endif
  
-.Lmutual_align:
-       /*
-       * Sources are mutually aligned, but are not currently at an
-       * alignment boundary.  Round down the addresses and then mask off
-       * the bytes that preceed the start point.
-       */
+L(mutual_align):
+       /* Sources are mutually aligned, but are not currently at an
+          alignment boundary.  Round down the addresses and then mask off
+          the bytes that preceed the start point.  */
         bic     src1, src1, #7
         bic     src2, src2, #7
         lsl     tmp1, tmp1, #3          /* Bytes beyond alignment -> bits.  */
@@ -86,138 +125,52 @@ SYM_FUNC_START_WEAK_PI(strcmp)
         neg     tmp1, tmp1              /* Bits to alignment -64.  */
         ldr     data2, [src2], #8
         mov     tmp2, #~0
+#ifdef __AARCH64EB__
         /* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl    tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
+       lsl     tmp2, tmp2, tmp1        /* Shift (tmp1 & 63).  */
+#else
         /* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr    tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
-
+       lsr     tmp2, tmp2, tmp1        /* Shift (tmp1 & 63).  */
+#endif
         orr     data1, data1, tmp2
         orr     data2, data2, tmp2
-       b       .Lstart_realigned
-
-.Lmisaligned8:
-       /*
-       * Get the align offset length to compare per byte first.
-       * After this process, one string's address will be aligned.
-       */
-       and     tmp1, src1, #7
-       neg     tmp1, tmp1
-       add     tmp1, tmp1, #8
-       and     tmp2, src2, #7
-       neg     tmp2, tmp2
-       add     tmp2, tmp2, #8
-       subs    tmp3, tmp1, tmp2
-       csel    pos, tmp1, tmp2, hi /*Choose the maximum. */
-.Ltinycmp:
+       b       L(start_realigned)
+
+L(misaligned8):
+       /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
+          checking to make sure that we don't access beyond page boundary in
+          SRC2.  */
+       tst     src1, #7
+       b.eq    L(loop_misaligned)
+L(do_misaligned):
         ldrb    data1w, [src1], #1
         ldrb    data2w, [src2], #1
-       subs    pos, pos, #1
-       ccmp    data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-       ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-       b.eq    .Ltinycmp
-       cbnz    pos, 1f /*find the null or unequal...*/
         cmp     data1w, #1
-       ccmp    data1w, data2w, #0, cs
-       b.eq    .Lstart_align /*the last bytes are equal....*/
-1:
-       sub     result, data1, data2
-       ret
-
-.Lstart_align:
-       ands    xzr, src1, #7
-       b.eq    .Lrecal_offset
-       /*process more leading bytes to make str1 aligned...*/
-       add     src1, src1, tmp3
-       add     src2, src2, tmp3
-       /*load 8 bytes from aligned str1 and non-aligned str2..*/
+       ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+       b.ne    L(done)
+       tst     src1, #7
+       b.ne    L(do_misaligned)
+
+L(loop_misaligned):
+       /* Test if we are within the last dword of the end of a 4K page.  If
+          yes then jump back to the misaligned loop to copy a byte at a time.  */
+       and     tmp1, src2, #0xff8
+       eor     tmp1, tmp1, #0xff8
+       cbz     tmp1, L(do_misaligned)
         ldr     data1, [src1], #8
         ldr     data2, [src2], #8
  
         sub     tmp1, data1, zeroones
         orr     tmp2, data1, #REP8_7f
-       bic     has_nul, tmp1, tmp2
-       eor     diff, data1, data2 /* Non-zero if differences found.  */
-       orr     syndrome, diff, has_nul
-       cbnz    syndrome, .Lcal_cmpresult
-       /*How far is the current str2 from the alignment boundary...*/
-       and     tmp3, tmp3, #7
-.Lrecal_offset:
-       neg     pos, tmp3
-.Lloopcmp_proc:
-       /*
-       * Divide the eight bytes into two parts. First,backwards the src2
-       * to an alignment boundary,load eight bytes from the SRC2 alignment
-       * boundary,then compare with the relative bytes from SRC1.
-       * If all 8 bytes are equal,then start the second part's comparison.
-       * Otherwise finish the comparison.
-       * This special handle can garantee all the accesses are in the
-       * thread/task space in avoid to overrange access.
-       */
-       ldr     data1, [src1,pos]
-       ldr     data2, [src2,pos]
-       sub     tmp1, data1, zeroones
-       orr     tmp2, data1, #REP8_7f
-       bic     has_nul, tmp1, tmp2
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
-       orr     syndrome, diff, has_nul
-       cbnz    syndrome, .Lcal_cmpresult
-
-       /*The second part process*/
-       ldr     data1, [src1], #8
-       ldr     data2, [src2], #8
-       sub     tmp1, data1, zeroones
-       orr     tmp2, data1, #REP8_7f
-       bic     has_nul, tmp1, tmp2
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
+       eor     diff, data1, data2      /* Non-zero if differences found.  */
+       bic     has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
         orr     syndrome, diff, has_nul
-       cbz     syndrome, .Lloopcmp_proc
+       cbz     syndrome, L(loop_misaligned)
+       b       L(end)
  
-.Lcal_cmpresult:
-       /*
-       * reversed the byte-order as big-endian,then CLZ can find the most
-       * significant zero bits.
-       */
-CPU_LE( rev    syndrome, syndrome )
-CPU_LE( rev    data1, data1 )
-CPU_LE( rev    data2, data2 )
-
-       /*
-       * For big-endian we cannot use the trick with the syndrome value
-       * as carry-propagation can corrupt the upper bits if the trailing
-       * bytes in the string contain 0x01.
-       * However, if there is no NUL byte in the dword, we can generate
-       * the result directly.  We cannot just subtract the bytes as the
-       * MSB might be significant.
-       */
-CPU_BE( cbnz   has_nul, 1f )
-CPU_BE( cmp    data1, data2 )
-CPU_BE( cset   result, ne )
-CPU_BE( cneg   result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
-       /*Re-compute the NUL-byte detection, using a byte-reversed value. */
-CPU_BE(        rev     tmp3, data1 )
-CPU_BE(        sub     tmp1, tmp3, zeroones )
-CPU_BE(        orr     tmp2, tmp3, #REP8_7f )
-CPU_BE(        bic     has_nul, tmp1, tmp2 )
-CPU_BE(        rev     has_nul, has_nul )
-CPU_BE(        orr     syndrome, diff, has_nul )
-
-       clz     pos, syndrome
-       /*
-       * The MS-non-zero bit of the syndrome marks either the first bit
-       * that is different, or the top bit of the first zero byte.
-       * Shifting left now will bring the critical information into the
-       * top bits.
-       */
-       lsl     data1, data1, pos
-       lsl     data2, data2, pos
-       /*
-       * But we need to zero-extend (char is unsigned) the value and then
-       * perform a signed 32-bit subtraction.
-       */
-       lsr     data1, data1, #56
-       sub     result, data1, data2, lsr #56
+L(done):
+       sub     result, data1, data2
         ret
+
  SYM_FUNC_END_PI(strcmp)
  EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S

index ee3ed88..35fbdb7 100644 (file)
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -1,115 +1,203 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013-2021, Arm Limited.
   *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/98e4d6a5c13c8e54/string/aarch64/strlen.S
   */
  
  #include <linux/linkage.h>
  #include <asm/assembler.h>
  
-/*
- * calculate the length of a string
+/* Assumptions:
   *
- * Parameters:
- *     x0 - const string pointer
- * Returns:
- *     x0 - the return length of specific string
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
   */
  
+#define L(label) .L ## label
+
  /* Arguments and results.  */
-srcin          .req    x0
-len            .req    x0
+#define srcin          x0
+#define len            x0
  
  /* Locals and temporaries.  */
-src            .req    x1
-data1          .req    x2
-data2          .req    x3
-data2a         .req    x4
-has_nul1       .req    x5
-has_nul2       .req    x6
-tmp1           .req    x7
-tmp2           .req    x8
-tmp3           .req    x9
-tmp4           .req    x10
-zeroones       .req    x11
-pos            .req    x12
+#define src            x1
+#define data1          x2
+#define data2          x3
+#define has_nul1       x4
+#define has_nul2       x5
+#define tmp1           x4
+#define tmp2           x5
+#define tmp3           x6
+#define tmp4           x7
+#define zeroones       x8
+
+       /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+          (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+          can be done in parallel across the entire word. A faster check
+          (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
+          false hits for characters 129..255.  */
  
  #define REP8_01 0x0101010101010101
  #define REP8_7f 0x7f7f7f7f7f7f7f7f
  #define REP8_80 0x8080808080808080
  
+#define MIN_PAGE_SIZE 4096
+
+       /* Since strings are short on average, we check the first 16 bytes
+          of the string for a NUL character.  In order to do an unaligned ldp
+          safely we have to do a page cross check first.  If there is a NUL
+          byte we calculate the length from the 2 8-byte words using
+          conditional select to reduce branch mispredictions (it is unlikely
+          strlen will be repeatedly called on strings with the same length).
+
+          If the string is longer than 16 bytes, we align src so don't need
+          further page cross checks, and process 32 bytes per iteration
+          using the fast NUL check.  If we encounter non-ASCII characters,
+          fallback to a second loop using the full NUL check.
+
+          If the page cross check fails, we read 16 bytes from an aligned
+          address, remove any characters before the string, and continue
+          in the main loop using aligned loads.  Since strings crossing a
+          page in the first 16 bytes are rare (probability of
+          16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
+
+          AArch64 systems have a minimum page size of 4k.  We don't bother
+          checking for larger page sizes - the cost of setting up the correct
+          page size is just not worth the extra gain from a small reduction in
+          the cases taking the slow path.  Note that we only care about
+          whether the first fetch, which may be misaligned, crosses a page
+          boundary.  */
+
  SYM_FUNC_START_WEAK_PI(strlen)
-       mov     zeroones, #REP8_01
-       bic     src, srcin, #15
-       ands    tmp1, srcin, #15
-       b.ne    .Lmisaligned
-       /*
-       * NUL detection works on the principle that (X - 1) & (~X) & 0x80
-       * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-       * can be done in parallel across the entire word.
-       */
-       /*
-       * The inner loop deals with two Dwords at a time. This has a
-       * slightly higher start-up cost, but we should win quite quickly,
-       * especially on cores with a high number of issue slots per
-       * cycle, as we get much better parallelism out of the operations.
-       */
-.Lloop:
-       ldp     data1, data2, [src], #16
-.Lrealigned:
+       and     tmp1, srcin, MIN_PAGE_SIZE - 1
+       mov     zeroones, REP8_01
+       cmp     tmp1, MIN_PAGE_SIZE - 16
+       b.gt    L(page_cross)
+       ldp     data1, data2, [srcin]
+#ifdef __AARCH64EB__
+       /* For big-endian, carry propagation (if the final byte in the
+          string is 0x01) means we cannot use has_nul1/2 directly.
+          Since we expect strings to be small and early-exit,
+          byte-swap the data now so has_null1/2 will be correct.  */
+       rev     data1, data1
+       rev     data2, data2
+#endif
         sub     tmp1, data1, zeroones
-       orr     tmp2, data1, #REP8_7f
+       orr     tmp2, data1, REP8_7f
         sub     tmp3, data2, zeroones
-       orr     tmp4, data2, #REP8_7f
-       bic     has_nul1, tmp1, tmp2
-       bics    has_nul2, tmp3, tmp4
-       ccmp    has_nul1, #0, #0, eq    /* NZCV = 0000  */
-       b.eq    .Lloop
+       orr     tmp4, data2, REP8_7f
+       bics    has_nul1, tmp1, tmp2
+       bic     has_nul2, tmp3, tmp4
+       ccmp    has_nul2, 0, 0, eq
+       beq     L(main_loop_entry)
+
+       /* Enter with C = has_nul1 == 0.  */
+       csel    has_nul1, has_nul1, has_nul2, cc
+       mov     len, 8
+       rev     has_nul1, has_nul1
+       clz     tmp1, has_nul1
+       csel    len, xzr, len, cc
+       add     len, len, tmp1, lsr 3
+       ret
  
+       /* The inner loop processes 32 bytes per iteration and uses the fast
+          NUL check.  If we encounter non-ASCII characters, use a second
+          loop with the accurate NUL check.  */
+       .p2align 4
+L(main_loop_entry):
+       bic     src, srcin, 15
+       sub     src, src, 16
+L(main_loop):
+       ldp     data1, data2, [src, 32]!
+L(page_cross_entry):
+       sub     tmp1, data1, zeroones
+       sub     tmp3, data2, zeroones
+       orr     tmp2, tmp1, tmp3
+       tst     tmp2, zeroones, lsl 7
+       bne     1f
+       ldp     data1, data2, [src, 16]
+       sub     tmp1, data1, zeroones
+       sub     tmp3, data2, zeroones
+       orr     tmp2, tmp1, tmp3
+       tst     tmp2, zeroones, lsl 7
+       beq     L(main_loop)
+       add     src, src, 16
+1:
+       /* The fast check failed, so do the slower, accurate NUL check.  */
+       orr     tmp2, data1, REP8_7f
+       orr     tmp4, data2, REP8_7f
+       bics    has_nul1, tmp1, tmp2
+       bic     has_nul2, tmp3, tmp4
+       ccmp    has_nul2, 0, 0, eq
+       beq     L(nonascii_loop)
+
+       /* Enter with C = has_nul1 == 0.  */
+L(tail):
+#ifdef __AARCH64EB__
+       /* For big-endian, carry propagation (if the final byte in the
+          string is 0x01) means we cannot use has_nul1/2 directly.  The
+          easiest way to get the correct byte is to byte-swap the data
+          and calculate the syndrome a second time.  */
+       csel    data1, data1, data2, cc
+       rev     data1, data1
+       sub     tmp1, data1, zeroones
+       orr     tmp2, data1, REP8_7f
+       bic     has_nul1, tmp1, tmp2
+#else
+       csel    has_nul1, has_nul1, has_nul2, cc
+#endif
         sub     len, src, srcin
-       cbz     has_nul1, .Lnul_in_data2
-CPU_BE(        mov     data2, data1 )  /*prepare data to re-calculate the syndrome*/
-       sub     len, len, #8
-       mov     has_nul2, has_nul1
-.Lnul_in_data2:
-       /*
-       * For big-endian, carry propagation (if the final byte in the
-       * string is 0x01) means we cannot use has_nul directly.  The
-       * easiest way to get the correct byte is to byte-swap the data
-       * and calculate the syndrome a second time.
-       */
-CPU_BE( rev    data2, data2 )
-CPU_BE( sub    tmp1, data2, zeroones )
-CPU_BE( orr    tmp2, data2, #REP8_7f )
-CPU_BE( bic    has_nul2, tmp1, tmp2 )
-
-       sub     len, len, #8
-       rev     has_nul2, has_nul2
-       clz     pos, has_nul2
-       add     len, len, pos, lsr #3           /* Bits to bytes.  */
+       rev     has_nul1, has_nul1
+       add     tmp2, len, 8
+       clz     tmp1, has_nul1
+       csel    len, len, tmp2, cc
+       add     len, len, tmp1, lsr 3
         ret
  
-.Lmisaligned:
-       cmp     tmp1, #8
-       neg     tmp1, tmp1
-       ldp     data1, data2, [src], #16
-       lsl     tmp1, tmp1, #3          /* Bytes beyond alignment -> bits.  */
-       mov     tmp2, #~0
-       /* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl    tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
+L(nonascii_loop):
+       ldp     data1, data2, [src, 16]!
+       sub     tmp1, data1, zeroones
+       orr     tmp2, data1, REP8_7f
+       sub     tmp3, data2, zeroones
+       orr     tmp4, data2, REP8_7f
+       bics    has_nul1, tmp1, tmp2
+       bic     has_nul2, tmp3, tmp4
+       ccmp    has_nul2, 0, 0, eq
+       bne     L(tail)
+       ldp     data1, data2, [src, 16]!
+       sub     tmp1, data1, zeroones
+       orr     tmp2, data1, REP8_7f
+       sub     tmp3, data2, zeroones
+       orr     tmp4, data2, REP8_7f
+       bics    has_nul1, tmp1, tmp2
+       bic     has_nul2, tmp3, tmp4
+       ccmp    has_nul2, 0, 0, eq
+       beq     L(nonascii_loop)
+       b       L(tail)
+
+       /* Load 16 bytes from [srcin & ~15] and force the bytes that precede
+          srcin to 0x7f, so we ignore any NUL bytes before the string.
+          Then continue in the aligned loop.  */
+L(page_cross):
+       bic     src, srcin, 15
+       ldp     data1, data2, [src]
+       lsl     tmp1, srcin, 3
+       mov     tmp4, -1
+#ifdef __AARCH64EB__
+       /* Big-endian.  Early bytes are at MSB.  */
+       lsr     tmp1, tmp4, tmp1        /* Shift (tmp1 & 63).  */
+#else
         /* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr    tmp2, tmp2, tmp1 )      /* Shift (tmp1 & 63).  */
+       lsl     tmp1, tmp4, tmp1        /* Shift (tmp1 & 63).  */
+#endif
+       orr     tmp1, tmp1, REP8_80
+       orn     data1, data1, tmp1
+       orn     tmp2, data2, tmp1
+       tst     srcin, 8
+       csel    data1, data1, tmp4, eq
+       csel    data2, data2, tmp2, eq
+       b       L(page_cross_entry)
  
-       orr     data1, data1, tmp2
-       orr     data2a, data2, tmp2
-       csinv   data1, data1, xzr, le
-       csel    data2, data2, data2a, le
-       b       .Lrealigned
  SYM_FUNC_END_PI(strlen)
  EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S

index 2a7ee94..48d44f7 100644 (file)
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,299 +1,261 @@
  /* SPDX-License-Identifier: GPL-2.0-only */
  /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013-2021, Arm Limited.
   *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S
   */
  
  #include <linux/linkage.h>
  #include <asm/assembler.h>
  
-/*
- * compare two strings
+/* Assumptions:
   *
- * Parameters:
- *  x0 - const string 1 pointer
- *  x1 - const string 2 pointer
- *  x2 - the maximal length to be compared
- * Returns:
- *  x0 - an integer less than, equal to, or greater than zero if s1 is found,
- *     respectively, to be less than, to match, or be greater than s2.
+ * ARMv8-a, AArch64
   */
  
+#define L(label) .L ## label
+
  #define REP8_01 0x0101010101010101
  #define REP8_7f 0x7f7f7f7f7f7f7f7f
  #define REP8_80 0x8080808080808080
  
  /* Parameters and result.  */
-src1           .req    x0
-src2           .req    x1
-limit          .req    x2
-result         .req    x0
+#define src1           x0
+#define src2           x1
+#define limit          x2
+#define result         x0
  
  /* Internal variables.  */
-data1          .req    x3
-data1w         .req    w3
-data2          .req    x4
-data2w         .req    w4
-has_nul                .req    x5
-diff           .req    x6
-syndrome       .req    x7
-tmp1           .req    x8
-tmp2           .req    x9
-tmp3           .req    x10
-zeroones       .req    x11
-pos            .req    x12
-limit_wd       .req    x13
-mask           .req    x14
-endloop                .req    x15
+#define data1          x3
+#define data1w         w3
+#define data2          x4
+#define data2w         w4
+#define has_nul                x5
+#define diff           x6
+#define syndrome       x7
+#define tmp1           x8
+#define tmp2           x9
+#define tmp3           x10
+#define zeroones       x11
+#define pos            x12
+#define limit_wd       x13
+#define mask           x14
+#define endloop                x15
+#define count          mask
  
  SYM_FUNC_START_WEAK_PI(strncmp)
-       cbz     limit, .Lret0
+       cbz     limit, L(ret0)
         eor     tmp1, src1, src2
         mov     zeroones, #REP8_01
         tst     tmp1, #7
-       b.ne    .Lmisaligned8
-       ands    tmp1, src1, #7
-       b.ne    .Lmutual_align
+       and     count, src1, #7
+       b.ne    L(misaligned8)
+       cbnz    count, L(mutual_align)
         /* Calculate the number of full and partial words -1.  */
-       /*
-       * when limit is mulitply of 8, if not sub 1,
-       * the judgement of last dword will wrong.
-       */
-       sub     limit_wd, limit, #1 /* limit != 0, so no underflow.  */
-       lsr     limit_wd, limit_wd, #3  /* Convert to Dwords.  */
+       sub     limit_wd, limit, #1     /* limit != 0, so no underflow.  */
+       lsr     limit_wd, limit_wd, #3  /* Convert to Dwords.  */
  
-       /*
-       * NUL detection works on the principle that (X - 1) & (~X) & 0x80
-       * (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-       * can be done in parallel across the entire word.
-       */
-.Lloop_aligned:
+       /* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+          (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+          can be done in parallel across the entire word.  */
+       .p2align 4
+L(loop_aligned):
         ldr     data1, [src1], #8
         ldr     data2, [src2], #8
-.Lstart_realigned:
+L(start_realigned):
         subs    limit_wd, limit_wd, #1
         sub     tmp1, data1, zeroones
         orr     tmp2, data1, #REP8_7f
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
-       csinv   endloop, diff, xzr, pl  /* Last Dword or differences.*/
-       bics    has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
+       eor     diff, data1, data2      /* Non-zero if differences found.  */
+       csinv   endloop, diff, xzr, pl  /* Last Dword or differences.  */
+       bics    has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
         ccmp    endloop, #0, #0, eq
-       b.eq    .Lloop_aligned
+       b.eq    L(loop_aligned)
+       /* End of main loop */
  
-       /*Not reached the limit, must have found the end or a diff.  */
-       tbz     limit_wd, #63, .Lnot_limit
+       /* Not reached the limit, must have found the end or a diff.  */
+       tbz     limit_wd, #63, L(not_limit)
  
         /* Limit % 8 == 0 => all bytes significant.  */
         ands    limit, limit, #7
-       b.eq    .Lnot_limit
+       b.eq    L(not_limit)
  
-       lsl     limit, limit, #3    /* Bits -> bytes.  */
+       lsl     limit, limit, #3        /* Bits -> bytes.  */
         mov     mask, #~0
-CPU_BE( lsr    mask, mask, limit )
-CPU_LE( lsl    mask, mask, limit )
+#ifdef __AARCH64EB__
+       lsr     mask, mask, limit
+#else
+       lsl     mask, mask, limit
+#endif
         bic     data1, data1, mask
         bic     data2, data2, mask
  
         /* Make sure that the NUL byte is marked in the syndrome.  */
         orr     has_nul, has_nul, mask
  
-.Lnot_limit:
+L(not_limit):
         orr     syndrome, diff, has_nul
-       b       .Lcal_cmpresult
  
-.Lmutual_align:
-       /*
-       * Sources are mutually aligned, but are not currently at an
-       * alignment boundary.  Round down the addresses and then mask off
-       * the bytes that precede the start point.
-       * We also need to adjust the limit calculations, but without
-       * overflowing if the limit is near ULONG_MAX.
-       */
+#ifndef        __AARCH64EB__
+       rev     syndrome, syndrome
+       rev     data1, data1
+       /* The MS-non-zero bit of the syndrome marks either the first bit
+          that is different, or the top bit of the first zero byte.
+          Shifting left now will bring the critical information into the
+          top bits.  */
+       clz     pos, syndrome
+       rev     data2, data2
+       lsl     data1, data1, pos
+       lsl     data2, data2, pos
+       /* But we need to zero-extend (char is unsigned) the value and then
+          perform a signed 32-bit subtraction.  */
+       lsr     data1, data1, #56
+       sub     result, data1, data2, lsr #56
+       ret
+#else
+       /* For big-endian we cannot use the trick with the syndrome value
+          as carry-propagation can corrupt the upper bits if the trailing
+          bytes in the string contain 0x01.  */
+       /* However, if there is no NUL byte in the dword, we can generate
+          the result directly.  We can't just subtract the bytes as the
+          MSB might be significant.  */
+       cbnz    has_nul, 1f
+       cmp     data1, data2
+       cset    result, ne
+       cneg    result, result, lo
+       ret
+1:
+       /* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+       rev     tmp3, data1
+       sub     tmp1, tmp3, zeroones
+       orr     tmp2, tmp3, #REP8_7f
+       bic     has_nul, tmp1, tmp2
+       rev     has_nul, has_nul
+       orr     syndrome, diff, has_nul
+       clz     pos, syndrome
+       /* The MS-non-zero bit of the syndrome marks either the first bit
+          that is different, or the top bit of the first zero byte.
+          Shifting left now will bring the critical information into the
+          top bits.  */
+       lsl     data1, data1, pos
+       lsl     data2, data2, pos
+       /* But we need to zero-extend (char is unsigned) the value and then
+          perform a signed 32-bit subtraction.  */
+       lsr     data1, data1, #56
+       sub     result, data1, data2, lsr #56
+       ret
+#endif
+
+L(mutual_align):
+       /* Sources are mutually aligned, but are not currently at an
+          alignment boundary.  Round down the addresses and then mask off
+          the bytes that precede the start point.
+          We also need to adjust the limit calculations, but without
+          overflowing if the limit is near ULONG_MAX.  */
         bic     src1, src1, #7
         bic     src2, src2, #7
         ldr     data1, [src1], #8
-       neg     tmp3, tmp1, lsl #3  /* 64 - bits(bytes beyond align). */
+       neg     tmp3, count, lsl #3     /* 64 - bits(bytes beyond align). */
         ldr     data2, [src2], #8
         mov     tmp2, #~0
-       sub     limit_wd, limit, #1 /* limit != 0, so no underflow.  */
+       sub     limit_wd, limit, #1     /* limit != 0, so no underflow.  */
+#ifdef __AARCH64EB__
         /* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl    tmp2, tmp2, tmp3 )      /* Shift (tmp1 & 63).  */
+       lsl     tmp2, tmp2, tmp3        /* Shift (count & 63).  */
+#else
         /* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr    tmp2, tmp2, tmp3 )      /* Shift (tmp1 & 63).  */
-
+       lsr     tmp2, tmp2, tmp3        /* Shift (count & 63).  */
+#endif
         and     tmp3, limit_wd, #7
         lsr     limit_wd, limit_wd, #3
-       /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
-       add     limit, limit, tmp1
-       add     tmp3, tmp3, tmp1
+       /* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
+       add     limit, limit, count
+       add     tmp3, tmp3, count
         orr     data1, data1, tmp2
         orr     data2, data2, tmp2
         add     limit_wd, limit_wd, tmp3, lsr #3
-       b       .Lstart_realigned
+       b       L(start_realigned)
+
+       .p2align 4
+       /* Don't bother with dwords for up to 16 bytes.  */
+L(misaligned8):
+       cmp     limit, #16
+       b.hs    L(try_misaligned_words)
  
-/*when src1 offset is not equal to src2 offset...*/
-.Lmisaligned8:
-       cmp     limit, #8
-       b.lo    .Ltiny8proc /*limit < 8... */
-       /*
-       * Get the align offset length to compare per byte first.
-       * After this process, one string's address will be aligned.*/
-       and     tmp1, src1, #7
-       neg     tmp1, tmp1
-       add     tmp1, tmp1, #8
-       and     tmp2, src2, #7
-       neg     tmp2, tmp2
-       add     tmp2, tmp2, #8
-       subs    tmp3, tmp1, tmp2
-       csel    pos, tmp1, tmp2, hi /*Choose the maximum. */
-       /*
-       * Here, limit is not less than 8, so directly run .Ltinycmp
-       * without checking the limit.*/
-       sub     limit, limit, pos
-.Ltinycmp:
+L(byte_loop):
+       /* Perhaps we can do better than this.  */
         ldrb    data1w, [src1], #1
         ldrb    data2w, [src2], #1
-       subs    pos, pos, #1
-       ccmp    data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-       ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-       b.eq    .Ltinycmp
-       cbnz    pos, 1f /*find the null or unequal...*/
-       cmp     data1w, #1
-       ccmp    data1w, data2w, #0, cs
-       b.eq    .Lstart_align /*the last bytes are equal....*/
-1:
+       subs    limit, limit, #1
+       ccmp    data1w, #1, #0, hi      /* NZCV = 0b0000.  */
+       ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+       b.eq    L(byte_loop)
+L(done):
         sub     result, data1, data2
         ret
-
-.Lstart_align:
+       /* Align the SRC1 to a dword by doing a bytewise compare and then do
+          the dword loop.  */
+L(try_misaligned_words):
         lsr     limit_wd, limit, #3
-       cbz     limit_wd, .Lremain8
-       /*process more leading bytes to make str1 aligned...*/
-       ands    xzr, src1, #7
-       b.eq    .Lrecal_offset
-       add     src1, src1, tmp3        /*tmp3 is positive in this branch.*/
-       add     src2, src2, tmp3
-       ldr     data1, [src1], #8
-       ldr     data2, [src2], #8
+       cbz     count, L(do_misaligned)
  
-       sub     limit, limit, tmp3
+       neg     count, count
+       and     count, count, #7
+       sub     limit, limit, count
         lsr     limit_wd, limit, #3
-       subs    limit_wd, limit_wd, #1
  
-       sub     tmp1, data1, zeroones
-       orr     tmp2, data1, #REP8_7f
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
-       csinv   endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-       bics    has_nul, tmp1, tmp2
-       ccmp    endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
-       b.ne    .Lunequal_proc
-       /*How far is the current str2 from the alignment boundary...*/
-       and     tmp3, tmp3, #7
-.Lrecal_offset:
-       neg     pos, tmp3
-.Lloopcmp_proc:
-       /*
-       * Divide the eight bytes into two parts. First,backwards the src2
-       * to an alignment boundary,load eight bytes from the SRC2 alignment
-       * boundary,then compare with the relative bytes from SRC1.
-       * If all 8 bytes are equal,then start the second part's comparison.
-       * Otherwise finish the comparison.
-       * This special handle can garantee all the accesses are in the
-       * thread/task space in avoid to overrange access.
-       */
-       ldr     data1, [src1,pos]
-       ldr     data2, [src2,pos]
-       sub     tmp1, data1, zeroones
-       orr     tmp2, data1, #REP8_7f
-       bics    has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
-       csinv   endloop, diff, xzr, eq
-       cbnz    endloop, .Lunequal_proc
+L(page_end_loop):
+       ldrb    data1w, [src1], #1
+       ldrb    data2w, [src2], #1
+       cmp     data1w, #1
+       ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
+       b.ne    L(done)
+       subs    count, count, #1
+       b.hi    L(page_end_loop)
+
+L(do_misaligned):
+       /* Prepare ourselves for the next page crossing.  Unlike the aligned
+          loop, we fetch 1 less dword because we risk crossing bounds on
+          SRC2.  */
+       mov     count, #8
+       subs    limit_wd, limit_wd, #1
+       b.lo    L(done_loop)
+L(loop_misaligned):
+       and     tmp2, src2, #0xff8
+       eor     tmp2, tmp2, #0xff8
+       cbz     tmp2, L(page_end_loop)
  
-       /*The second part process*/
         ldr     data1, [src1], #8
         ldr     data2, [src2], #8
-       subs    limit_wd, limit_wd, #1
         sub     tmp1, data1, zeroones
         orr     tmp2, data1, #REP8_7f
-       eor     diff, data1, data2  /* Non-zero if differences found.  */
-       csinv   endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-       bics    has_nul, tmp1, tmp2
-       ccmp    endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
-       b.eq    .Lloopcmp_proc
-
-.Lunequal_proc:
-       orr     syndrome, diff, has_nul
-       cbz     syndrome, .Lremain8
-.Lcal_cmpresult:
-       /*
-       * reversed the byte-order as big-endian,then CLZ can find the most
-       * significant zero bits.
-       */
-CPU_LE( rev    syndrome, syndrome )
-CPU_LE( rev    data1, data1 )
-CPU_LE( rev    data2, data2 )
-       /*
-       * For big-endian we cannot use the trick with the syndrome value
-       * as carry-propagation can corrupt the upper bits if the trailing
-       * bytes in the string contain 0x01.
-       * However, if there is no NUL byte in the dword, we can generate
-       * the result directly.  We can't just subtract the bytes as the
-       * MSB might be significant.
-       */
-CPU_BE( cbnz   has_nul, 1f )
-CPU_BE( cmp    data1, data2 )
-CPU_BE( cset   result, ne )
-CPU_BE( cneg   result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
-       /* Re-compute the NUL-byte detection, using a byte-reversed value.*/
-CPU_BE( rev    tmp3, data1 )
-CPU_BE( sub    tmp1, tmp3, zeroones )
-CPU_BE( orr    tmp2, tmp3, #REP8_7f )
-CPU_BE( bic    has_nul, tmp1, tmp2 )
-CPU_BE( rev    has_nul, has_nul )
-CPU_BE( orr    syndrome, diff, has_nul )
-       /*
-       * The MS-non-zero bit of the syndrome marks either the first bit
-       * that is different, or the top bit of the first zero byte.
-       * Shifting left now will bring the critical information into the
-       * top bits.
-       */
-       clz     pos, syndrome
-       lsl     data1, data1, pos
-       lsl     data2, data2, pos
-       /*
-       * But we need to zero-extend (char is unsigned) the value and then
-       * perform a signed 32-bit subtraction.
-       */
-       lsr     data1, data1, #56
-       sub     result, data1, data2, lsr #56
-       ret
-
-.Lremain8:
-       /* Limit % 8 == 0 => all bytes significant.  */
-       ands    limit, limit, #7
-       b.eq    .Lret0
-.Ltiny8proc:
-       ldrb    data1w, [src1], #1
-       ldrb    data2w, [src2], #1
-       subs    limit, limit, #1
+       eor     diff, data1, data2      /* Non-zero if differences found.  */
+       bics    has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
+       ccmp    diff, #0, #0, eq
+       b.ne    L(not_limit)
+       subs    limit_wd, limit_wd, #1
+       b.pl    L(loop_misaligned)
  
-       ccmp    data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-       ccmp    data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-       b.eq    .Ltiny8proc
-       sub     result, data1, data2
-       ret
+L(done_loop):
+       /* We found a difference or a NULL before the limit was reached.  */
+       and     limit, limit, #7
+       cbz     limit, L(not_limit)
+       /* Read the last word.  */
+       sub     src1, src1, 8
+       sub     src2, src2, 8
+       ldr     data1, [src1, limit]
+       ldr     data2, [src2, limit]
+       sub     tmp1, data1, zeroones
+       orr     tmp2, data1, #REP8_7f
+       eor     diff, data1, data2      /* Non-zero if differences found.  */
+       bics    has_nul, tmp1, tmp2     /* Non-zero if NUL terminator.  */
+       ccmp    diff, #0, #0, eq
+       b.ne    L(not_limit)
  
-.Lret0:
+L(ret0):
         mov     result, #0
         ret
+
  SYM_FUNC_END_PI(strncmp)
  EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c

index c83bb5a..baee229 100644 (file)
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt)
          * barrier to order the cache maintenance against the memcpy.
          */
         memcpy(dst, src, cnt);
-       __clean_dcache_area_pop(dst, cnt);
+       dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt);
  }
  EXPORT_SYMBOL_GPL(memcpy_flushcache);
  
@@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
         rc = raw_copy_from_user(to, from, n);
  
         /* See above */
-       __clean_dcache_area_pop(to, n - rc);
+       dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc);
         return rc;
  }
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S

index 2d881f3..5051b3c 100644 (file)
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -15,7 +15,7 @@
  #include <asm/asm-uaccess.h>
  
  /*
- *     flush_icache_range(start,end)
+ *     caches_clean_inval_pou_macro(start,end) [fixup]
   *
   *     Ensure that the I and D caches are coherent within specified region.
   *     This is typically used when code has been written to a memory region,
@@ -23,12 +23,27 @@
   *
   *     - start   - virtual start address of region
   *     - end     - virtual end address of region
+ *     - fixup   - optional label to branch to on user fault
   */
-SYM_FUNC_START(__flush_icache_range)
-       /* FALLTHROUGH */
+.macro caches_clean_inval_pou_macro, fixup
+alternative_if ARM64_HAS_CACHE_IDC
+       dsb     ishst
+       b       .Ldc_skip_\@
+alternative_else_nop_endif
+       mov     x2, x0
+       mov     x3, x1
+       dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup
+.Ldc_skip_\@:
+alternative_if ARM64_HAS_CACHE_DIC
+       isb
+       b       .Lic_skip_\@
+alternative_else_nop_endif
+       invalidate_icache_by_line x0, x1, x2, x3, \fixup
+.Lic_skip_\@:
+.endm
  
  /*
- *     __flush_cache_user_range(start,end)
+ *     caches_clean_inval_pou(start,end)
   *
   *     Ensure that the I and D caches are coherent within specified region.
   *     This is typically used when code has been written to a memory region,
@@ -37,117 +52,103 @@ SYM_FUNC_START(__flush_icache_range)
   *     - start   - virtual start address of region
   *     - end     - virtual end address of region
   */
-SYM_FUNC_START(__flush_cache_user_range)
+SYM_FUNC_START(caches_clean_inval_pou)
+       caches_clean_inval_pou_macro
+       ret
+SYM_FUNC_END(caches_clean_inval_pou)
+
+/*
+ *     caches_clean_inval_user_pou(start,end)
+ *
+ *     Ensure that the I and D caches are coherent within specified region.
+ *     This is typically used when code has been written to a memory region,
+ *     and will be executed.
+ *
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
+ */
+SYM_FUNC_START(caches_clean_inval_user_pou)
         uaccess_ttbr0_enable x2, x3, x4
-alternative_if ARM64_HAS_CACHE_IDC
-       dsb     ishst
-       b       7f
-alternative_else_nop_endif
-       dcache_line_size x2, x3
-       sub     x3, x2, #1
-       bic     x4, x0, x3
-1:
-user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
-       add     x4, x4, x2
-       cmp     x4, x1
-       b.lo    1b
-       dsb     ish
  
-7:
-alternative_if ARM64_HAS_CACHE_DIC
-       isb
-       b       8f
-alternative_else_nop_endif
-       invalidate_icache_by_line x0, x1, x2, x3, 9f
-8:     mov     x0, #0
+       caches_clean_inval_pou_macro 2f
+       mov     x0, xzr
  1:
         uaccess_ttbr0_disable x1, x2
         ret
-9:
+2:
         mov     x0, #-EFAULT
         b       1b
-SYM_FUNC_END(__flush_icache_range)
-SYM_FUNC_END(__flush_cache_user_range)
+SYM_FUNC_END(caches_clean_inval_user_pou)
  
  /*
- *     invalidate_icache_range(start,end)
+ *     icache_inval_pou(start,end)
   *
   *     Ensure that the I cache is invalid within specified region.
   *
   *     - start   - virtual start address of region
   *     - end     - virtual end address of region
   */
-SYM_FUNC_START(invalidate_icache_range)
+SYM_FUNC_START(icache_inval_pou)
  alternative_if ARM64_HAS_CACHE_DIC
-       mov     x0, xzr
         isb
         ret
  alternative_else_nop_endif
  
-       uaccess_ttbr0_enable x2, x3, x4
-
-       invalidate_icache_by_line x0, x1, x2, x3, 2f
-       mov     x0, xzr
-1:
-       uaccess_ttbr0_disable x1, x2
+       invalidate_icache_by_line x0, x1, x2, x3
         ret
-2:
-       mov     x0, #-EFAULT
-       b       1b
-SYM_FUNC_END(invalidate_icache_range)
+SYM_FUNC_END(icache_inval_pou)
  
  /*
- *     __flush_dcache_area(kaddr, size)
+ *     dcache_clean_inval_poc(start, end)
   *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
   *     are cleaned and invalidated to the PoC.
   *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
   */
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
         dcache_by_line_op civac, sy, x0, x1, x2, x3
         ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
  
  /*
- *     __clean_dcache_area_pou(kaddr, size)
+ *     dcache_clean_pou(start, end)
   *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
   *     are cleaned to the PoU.
   *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
   */
-SYM_FUNC_START(__clean_dcache_area_pou)
+SYM_FUNC_START(dcache_clean_pou)
  alternative_if ARM64_HAS_CACHE_IDC
         dsb     ishst
         ret
  alternative_else_nop_endif
         dcache_by_line_op cvau, ish, x0, x1, x2, x3
         ret
-SYM_FUNC_END(__clean_dcache_area_pou)
+SYM_FUNC_END(dcache_clean_pou)
  
  /*
- *     __inval_dcache_area(kaddr, size)
+ *     dcache_inval_poc(start, end)
   *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
   *     are invalidated. Any partial lines at the ends of the interval are
   *     also cleaned to PoC to prevent data loss.
   *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - kernel start address of region
+ *     - end     - kernel end address of region
   */
  SYM_FUNC_START_LOCAL(__dma_inv_area)
-SYM_FUNC_START_PI(__inval_dcache_area)
+SYM_FUNC_START_PI(dcache_inval_poc)
         /* FALLTHROUGH */
  
  /*
- *     __dma_inv_area(start, size)
+ *     __dma_inv_area(start, end)
   *     - start   - virtual start address of region
- *     - size    - size in question
+ *     - end     - virtual end address of region
   */
-       add     x1, x1, x0
         dcache_line_size x2, x3
         sub     x3, x2, #1
         tst     x1, x3                          // end cache line aligned?
@@ -165,48 +166,48 @@ SYM_FUNC_START_PI(__inval_dcache_area)
         b.lo    2b
         dsb     sy
         ret
-SYM_FUNC_END_PI(__inval_dcache_area)
+SYM_FUNC_END_PI(dcache_inval_poc)
  SYM_FUNC_END(__dma_inv_area)
  
  /*
- *     __clean_dcache_area_poc(kaddr, size)
+ *     dcache_clean_poc(start, end)
   *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
   *     are cleaned to the PoC.
   *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
   */
  SYM_FUNC_START_LOCAL(__dma_clean_area)
-SYM_FUNC_START_PI(__clean_dcache_area_poc)
+SYM_FUNC_START_PI(dcache_clean_poc)
         /* FALLTHROUGH */
  
  /*
- *     __dma_clean_area(start, size)
+ *     __dma_clean_area(start, end)
   *     - start   - virtual start address of region
- *     - size    - size in question
+ *     - end     - virtual end address of region
   */
         dcache_by_line_op cvac, sy, x0, x1, x2, x3
         ret
-SYM_FUNC_END_PI(__clean_dcache_area_poc)
+SYM_FUNC_END_PI(dcache_clean_poc)
  SYM_FUNC_END(__dma_clean_area)
  
  /*
- *     __clean_dcache_area_pop(kaddr, size)
+ *     dcache_clean_pop(start, end)
   *
- *     Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *     Ensure that any D-cache lines for the interval [start, end)
   *     are cleaned to the PoP.
   *
- *     - kaddr   - kernel address
- *     - size    - size in question
+ *     - start   - virtual start address of region
+ *     - end     - virtual end address of region
   */
-SYM_FUNC_START_PI(__clean_dcache_area_pop)
+SYM_FUNC_START_PI(dcache_clean_pop)
         alternative_if_not ARM64_HAS_DCPOP
-       b       __clean_dcache_area_poc
+       b       dcache_clean_poc
         alternative_else_nop_endif
         dcache_by_line_op cvap, sy, x0, x1, x2, x3
         ret
-SYM_FUNC_END_PI(__clean_dcache_area_pop)
+SYM_FUNC_END_PI(dcache_clean_pop)
  
  /*
   *     __dma_flush_area(start, size)
@@ -217,6 +218,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop)
   *     - size    - size in question
   */
  SYM_FUNC_START_PI(__dma_flush_area)
+       add     x1, x0, x1
         dcache_by_line_op civac, sy, x0, x1, x2, x3
         ret
  SYM_FUNC_END_PI(__dma_flush_area)
@@ -228,6 +230,7 @@ SYM_FUNC_END_PI(__dma_flush_area)
   *     - dir   - DMA direction
   */
  SYM_FUNC_START_PI(__dma_map_area)
+       add     x1, x0, x1
         cmp     w2, #DMA_FROM_DEVICE
         b.eq    __dma_inv_area
         b       __dma_clean_area
@@ -240,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area)
   *     - dir   - DMA direction
   */
  SYM_FUNC_START_PI(__dma_unmap_area)
+       add     x1, x0, x1
         cmp     w2, #DMA_TO_DEVICE
         b.ne    __dma_inv_area
         ret
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c

index bd9a0bb..d0f972a 100644 (file)
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -841,13 +841,6 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
  }
  NOKPROBE_SYMBOL(do_mem_abort);
  
-void do_el0_irq_bp_hardening(void)
-{
-       /* PC has already been checked in entry.S */
-       arm64_apply_bp_hardening();
-}
-NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
-
  void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
  {
         arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c

index 6d44c02..2aaf950 100644 (file)
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -14,28 +14,25 @@
  #include <asm/cache.h>
  #include <asm/tlbflush.h>
  
-void sync_icache_aliases(void *kaddr, unsigned long len)
+void sync_icache_aliases(unsigned long start, unsigned long end)
  {
-       unsigned long addr = (unsigned long)kaddr;
-
         if (icache_is_aliasing()) {
-               __clean_dcache_area_pou(kaddr, len);
-               __flush_icache_all();
+               dcache_clean_pou(start, end);
+               icache_inval_all_pou();
         } else {
                 /*
                  * Don't issue kick_all_cpus_sync() after I-cache invalidation
                  * for user mappings.
                  */
-               __flush_icache_range(addr, addr + len);
+               caches_clean_inval_pou(start, end);
         }
  }
  
-static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
-                               unsigned long uaddr, void *kaddr,
-                               unsigned long len)
+static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start,
+                               unsigned long end)
  {
         if (vma->vm_flags & VM_EXEC)
-               sync_icache_aliases(kaddr, len);
+               sync_icache_aliases(start, end);
  }
  
  /*
@@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
                        unsigned long len)
  {
         memcpy(dst, src, len);
-       flush_ptrace_access(vma, page, uaddr, dst, len);
+       flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len);
  }
  
  void __sync_icache_dcache(pte_t pte)
@@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte)
         struct page *page = pte_page(pte);
  
         if (!test_bit(PG_dcache_clean, &page->flags)) {
-               sync_icache_aliases(page_address(page), page_size(page));
+               sync_icache_aliases((unsigned long)page_address(page),
+                                   (unsigned long)page_address(page) +
+                                           page_size(page));
                 set_bit(PG_dcache_clean, &page->flags);
         }
  }
@@ -77,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page);
  /*
   * Additional functions defined in assembly.
   */
-EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(caches_clean_inval_pou);
  
  #ifdef CONFIG_ARCH_HAS_PMEM_API
  void arch_wb_cache_pmem(void *addr, size_t size)
  {
         /* Ensure order against any prior non-cacheable writes */
         dmb(osh);
-       __clean_dcache_area_pop(addr, size);
+       dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size);
  }
  EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
  
  void arch_invalidate_pmem(void *addr, size_t size)
  {
-       __inval_dcache_area(addr, size);
+       dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size);
  }
  EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
  #endif
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S

index 6e640fa..9b0ad5b 100644 (file)
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -81,11 +81,7 @@ SYM_FUNC_START(cpu_do_suspend)
         mrs     x9, mdscr_el1
         mrs     x10, oslsr_el1
         mrs     x11, sctlr_el1
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-       mrs     x12, tpidr_el1
-alternative_else
-       mrs     x12, tpidr_el2
-alternative_endif
+       get_this_cpu_offset x12
         mrs     x13, sp_el0
         stp     x2, x3, [x0]
         stp     x4, x5, [x0, #16]
@@ -143,11 +139,7 @@ SYM_FUNC_START(cpu_do_resume)
         msr     mdscr_el1, x10
  
         msr     sctlr_el1, x12
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-       msr     tpidr_el1, x13
-alternative_else
-       msr     tpidr_el2, x13
-alternative_endif
+       set_this_cpu_offset x13
         msr     sp_el0, x14
         /*
          * Restore oslsr_el1 by writing oslar_el1
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c

index f7b1948..dd5000d 100644 (file)
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -16,6 +16,7 @@
  #include <asm/byteorder.h>
  #include <asm/cacheflush.h>
  #include <asm/debug-monitors.h>
+#include <asm/insn.h>
  #include <asm/set_memory.h>
  
  #include "bpf_jit.h"
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps

index 21fbdda..49305c2 100644 (file)
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -3,7 +3,8 @@
  # Internal CPU capabilities constants, keep this list sorted
  
  BTI
-HAS_32BIT_EL0
+# Unreliable: use system_supports_32bit_el0() instead.
+HAS_32BIT_EL0_DO_NOT_USE
  HAS_32BIT_EL1
  HAS_ADDRESS_AUTH
  HAS_ADDRESS_AUTH_ARCH
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c

index 3c1c5da..e3da38e 100644 (file)
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -335,10 +335,15 @@ int psci_cpu_suspend_enter(u32 state)
  {
         int ret;
  
-       if (!psci_power_state_loses_context(state))
+       if (!psci_power_state_loses_context(state)) {
+               struct arm_cpuidle_irq_context context;
+
+               arm_cpuidle_save_irq_context(&context);
                 ret = psci_ops.cpu_suspend(state, 0);
-       else
+               arm_cpuidle_restore_irq_context(&context);
+       } else {
                 ret = cpu_suspend(state, psci_suspend_finisher);
+       }
  
         return ret;
  }
diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c

index 9df8e7f..9362938 100644 (file)
--- a/mm/kasan/sw_tags.c
+++ b/mm/kasan/sw_tags.c
@@ -207,3 +207,10 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
  
         return &alloc_meta->free_track[i];
  }
+
+void kasan_tag_mismatch(unsigned long addr, unsigned long access_info,
+                       unsigned long ret_ip)
+{
+       kasan_report(addr, 1 << (access_info & 0xf), access_info & 0x10,
+                    ret_ip);
+}
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan

index 3d79190..801c415 100644 (file)
--- a/scripts/Makefile.kasan
+++ b/scripts/Makefile.kasan
@@ -50,6 +50,7 @@ endif
  CFLAGS_KASAN := -fsanitize=kernel-hwaddress \
                 $(call cc-param,hwasan-instrument-stack=$(stack_enable)) \
                 $(call cc-param,hwasan-use-short-granules=0) \
+               $(call cc-param,hwasan-inline-all-checks=0) \
                 $(instrumentation_flags)
  
  endif # CONFIG_KASAN_SW_TAGS
diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh

index 45e8aa3..cb55878 100755 (executable)
--- a/scripts/tools-support-relr.sh
+++ b/scripts/tools-support-relr.sh
@@ -7,7 +7,8 @@ trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
  cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1
  void *p = &p;
  END
-$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
+$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr \
+  --use-android-relr-tags -o $tmp_file
  
  # Despite printing an error message, GNU nm still exits with exit code 0 if it
  # sees a relr section. So we need to check that nothing is printed to stderr.
author	Will Deacon <will@kernel.org>
	Thu, 24 Jun 2021 13:04:33 +0000 (14:04 +0100)
committer	Will Deacon <will@kernel.org>
	Thu, 24 Jun 2021 13:04:33 +0000 (14:04 +0100)
Documentation/arm64/booting.rst		patch \| blob \| history
Makefile		patch \| blob \| history
arch/arm/include/asm/cpuidle.h		patch \| blob \| history
arch/arm64/include/asm/alternative-macros.h		patch \| blob \| history
arch/arm64/include/asm/arch_gicv3.h		patch \| blob \| history
arch/arm64/include/asm/asm-prototypes.h		patch \| blob \| history
arch/arm64/include/asm/assembler.h		patch \| blob \| history
arch/arm64/include/asm/cacheflush.h		patch \| blob \| history
arch/arm64/include/asm/cpu.h		patch \| blob \| history
arch/arm64/include/asm/cpufeature.h		patch \| blob \| history
arch/arm64/include/asm/cpuidle.h		patch \| blob \| history
arch/arm64/include/asm/efi.h		patch \| blob \| history
arch/arm64/include/asm/exception.h		patch \| blob \| history
arch/arm64/include/asm/insn-def.h	[new file with mode: 0644]	patch \| blob
arch/arm64/include/asm/insn.h		patch \| blob \| history
arch/arm64/include/asm/kvm_asm.h		patch \| blob \| history
arch/arm64/include/asm/kvm_mmu.h		patch \| blob \| history
arch/arm64/include/asm/linkage.h		patch \| blob \| history
arch/arm64/include/asm/module.lds.h		patch \| blob \| history
arch/arm64/include/asm/patching.h	[new file with mode: 0644]	patch \| blob
arch/arm64/include/asm/processor.h		patch \| blob \| history
arch/arm64/include/asm/scs.h		patch \| blob \| history
arch/arm64/include/asm/sdei.h		patch \| blob \| history
arch/arm64/include/asm/smp.h		patch \| blob \| history
arch/arm64/include/asm/stacktrace.h		patch \| blob \| history
arch/arm64/kernel/Makefile		patch \| blob \| history
arch/arm64/kernel/alternative.c		patch \| blob \| history
arch/arm64/kernel/asm-offsets.c		patch \| blob \| history
arch/arm64/kernel/cpufeature.c		patch \| blob \| history
arch/arm64/kernel/cpuinfo.c		patch \| blob \| history
arch/arm64/kernel/efi-entry.S		patch \| blob \| history
arch/arm64/kernel/entry-common.c		patch \| blob \| history
arch/arm64/kernel/entry.S		patch \| blob \| history
arch/arm64/kernel/ftrace.c		patch \| blob \| history
arch/arm64/kernel/head.S		patch \| blob \| history
arch/arm64/kernel/hibernate-asm.S		patch \| blob \| history
arch/arm64/kernel/hibernate.c		patch \| blob \| history
arch/arm64/kernel/idle.c	[new file with mode: 0644]	patch \| blob
arch/arm64/kernel/idreg-override.c		patch \| blob \| history
arch/arm64/kernel/image-vars.h		patch \| blob \| history
arch/arm64/kernel/insn.c	[deleted file]	patch \| blob \| history
arch/arm64/kernel/jump_label.c		patch \| blob \| history
arch/arm64/kernel/kaslr.c		patch \| blob \| history
arch/arm64/kernel/kgdb.c		patch \| blob \| history
arch/arm64/kernel/machine_kexec.c		patch \| blob \| history
arch/arm64/kernel/patching.c	[new file with mode: 0644]	patch \| blob
arch/arm64/kernel/perf_callchain.c		patch \| blob \| history
arch/arm64/kernel/probes/kprobes.c		patch \| blob \| history
arch/arm64/kernel/probes/simulate-insn.c		patch \| blob \| history
arch/arm64/kernel/probes/uprobes.c		patch \| blob \| history
arch/arm64/kernel/process.c		patch \| blob \| history
arch/arm64/kernel/ptrace.c		patch \| blob \| history
arch/arm64/kernel/sdei.c		patch \| blob \| history
arch/arm64/kernel/setup.c		patch \| blob \| history
arch/arm64/kernel/signal.c		patch \| blob \| history
arch/arm64/kernel/smp.c		patch \| blob \| history
arch/arm64/kernel/smp_spin_table.c		patch \| blob \| history
arch/arm64/kernel/stacktrace.c		patch \| blob \| history
arch/arm64/kernel/suspend.c		patch \| blob \| history
arch/arm64/kernel/sys_compat.c		patch \| blob \| history
arch/arm64/kernel/traps.c		patch \| blob \| history
arch/arm64/kvm/arm.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/cache.S		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/setup.c		patch \| blob \| history
arch/arm64/kvm/hyp/nvhe/tlb.c		patch \| blob \| history
arch/arm64/kvm/hyp/pgtable.c		patch \| blob \| history
arch/arm64/lib/Makefile		patch \| blob \| history
arch/arm64/lib/clear_user.S		patch \| blob \| history
arch/arm64/lib/insn.c	[new file with mode: 0644]	patch \| blob
arch/arm64/lib/kasan_sw_tags.S	[new file with mode: 0644]	patch \| blob
arch/arm64/lib/memchr.S		patch \| blob \| history
arch/arm64/lib/memcmp.S		patch \| blob \| history
arch/arm64/lib/memcpy.S		patch \| blob \| history
arch/arm64/lib/memmove.S	[deleted file]	patch \| blob \| history
arch/arm64/lib/strcmp.S		patch \| blob \| history
arch/arm64/lib/strlen.S		patch \| blob \| history
arch/arm64/lib/strncmp.S		patch \| blob \| history
arch/arm64/lib/uaccess_flushcache.c		patch \| blob \| history
arch/arm64/mm/cache.S		patch \| blob \| history
arch/arm64/mm/fault.c		patch \| blob \| history
arch/arm64/mm/flush.c		patch \| blob \| history
arch/arm64/mm/proc.S		patch \| blob \| history
arch/arm64/net/bpf_jit_comp.c		patch \| blob \| history
arch/arm64/tools/cpucaps		patch \| blob \| history
drivers/firmware/psci/psci.c		patch \| blob \| history
mm/kasan/sw_tags.c		patch \| blob \| history
scripts/Makefile.kasan		patch \| blob \| history
scripts/tools-support-relr.sh		patch \| blob \| history