Merge tag 'arc-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 5 Sep 2021 18:43:03 +0000 (11:43 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 5 Sep 2021 18:43:03 +0000 (11:43 -0700)
Pull ARC updates from Vineet Gupta:
 "Finally a big pile of changes for ARC (atomics/mm). These are from our
  internal arc64 tree, preparing mainline for eventual arc64 support.
  I'm spreading them out to avoid tsunami of patches in one release.

   - MM rework:
       - Implement up to 4 paging levels
       - Enable STRICT_MM_TYPECHECK
       - switch pgtable_t back to 'struct page *'

   - Atomics rework / implement relaxed accessors

   - Retire legacy MMUv1,v2; ARC750 cores

   - A few other build errors, typos"

* tag 'arc-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc: (33 commits)
  ARC: mm: vmalloc sync from kernel to user table to update PMD ...
  ARC: mm: support 4 levels of page tables
  ARC: mm: support 3 levels of page tables
  ARC: mm: switch to asm-generic/pgalloc.h
  ARC: mm: switch pgtable_t back to struct page *
  ARC: mm: hack to allow 2 level build with 4 level code
  ARC: mm: disintegrate pgtable.h into levels and flags
  ARC: mm: disintegrate mmu.h (arcv2 bits out)
  ARC: mm: move MMU specific bits out of entry code ...
  ARC: mm: move MMU specific bits out of ASID allocator
  ARC: mm: non-functional code movement/cleanup
  ARC: mm: pmd_populate* to use the canonical set_pmd (and drop pmd_set)
  ARC: ioremap: use more commonly used PAGE_KERNEL based uncached flag
  ARC: mm: Enable STRICT_MM_TYPECHECKS
  ARC: mm: Fixes to allow STRICT_MM_TYPECHECKS
  ARC: mm: move mmu/cache externs out to setup.h
  ARC: mm: remove tlb paranoid code
  ARC: mm: use SCRATCH_DATA0 register for caching pgdir in ARCv2 only
  ARC: retire MMUv1 and MMUv2 support
  ARC: retire ARC750 support
  ...

33 files changed:
arch/arc/Kconfig
arch/arc/include/asm/atomic-llsc.h [new file with mode: 0644]
arch/arc/include/asm/atomic-spinlock.h [new file with mode: 0644]
arch/arc/include/asm/atomic.h
arch/arc/include/asm/atomic64-arcv2.h [new file with mode: 0644]
arch/arc/include/asm/bitops.h
arch/arc/include/asm/cache.h
arch/arc/include/asm/cmpxchg.h
arch/arc/include/asm/entry-compact.h
arch/arc/include/asm/hugepage.h
arch/arc/include/asm/mmu-arcv2.h [new file with mode: 0644]
arch/arc/include/asm/mmu.h
arch/arc/include/asm/mmu_context.h
arch/arc/include/asm/page.h
arch/arc/include/asm/pgalloc.h
arch/arc/include/asm/pgtable-bits-arcv2.h [new file with mode: 0644]
arch/arc/include/asm/pgtable-levels.h [new file with mode: 0644]
arch/arc/include/asm/pgtable.h
arch/arc/include/asm/processor.h
arch/arc/include/asm/setup.h
arch/arc/include/asm/smp.h
arch/arc/include/asm/tlb-mmu1.h [deleted file]
arch/arc/kernel/entry-arcv2.S
arch/arc/kernel/entry.S
arch/arc/kernel/intc-compact.c
arch/arc/kernel/smp.c
arch/arc/kernel/stacktrace.c
arch/arc/mm/cache.c
arch/arc/mm/fault.c
arch/arc/mm/init.c
arch/arc/mm/ioremap.c
arch/arc/mm/tlb.c
arch/arc/mm/tlbex.S

index b5bf68e..f631a7a 100644 (file)
@@ -116,16 +116,9 @@ choice
        default ARC_CPU_770 if ISA_ARCOMPACT
        default ARC_CPU_HS if ISA_ARCV2
 
-if ISA_ARCOMPACT
-
-config ARC_CPU_750D
-       bool "ARC750D"
-       select ARC_CANT_LLSC
-       help
-         Support for ARC750 core
-
 config ARC_CPU_770
        bool "ARC770"
+       depends on ISA_ARCOMPACT
        select ARC_HAS_SWAPE
        help
          Support for ARC770 core introduced with Rel 4.10 (Summer 2011)
@@ -135,8 +128,6 @@ config ARC_CPU_770
          -Caches: New Prog Model, Region Flush
          -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr
 
-endif #ISA_ARCOMPACT
-
 config ARC_CPU_HS
        bool "ARC-HS"
        depends on ISA_ARCV2
@@ -274,33 +265,17 @@ config ARC_DCCM_BASE
 
 choice
        prompt "MMU Version"
-       default ARC_MMU_V3 if ARC_CPU_770
-       default ARC_MMU_V2 if ARC_CPU_750D
-       default ARC_MMU_V4 if ARC_CPU_HS
-
-if ISA_ARCOMPACT
-
-config ARC_MMU_V1
-       bool "MMU v1"
-       help
-         Orig ARC700 MMU
-
-config ARC_MMU_V2
-       bool "MMU v2"
-       help
-         Fixed the deficiency of v1 - possible thrashing in memcpy scenario
-         when 2 D-TLB and 1 I-TLB entries index into same 2way set.
+       default ARC_MMU_V3 if ISA_ARCOMPACT
+       default ARC_MMU_V4 if ISA_ARCV2
 
 config ARC_MMU_V3
        bool "MMU v3"
-       depends on ARC_CPU_770
+       depends on ISA_ARCOMPACT
        help
          Introduced with ARC700 4.10: New Features
          Variable Page size (1k-16k), var JTLB size 128 x (2 or 4)
          Shared Address Spaces (SASID)
 
-endif
-
 config ARC_MMU_V4
        bool "MMU v4"
        depends on ISA_ARCV2
@@ -319,7 +294,6 @@ config ARC_PAGE_SIZE_8K
 
 config ARC_PAGE_SIZE_16K
        bool "16KB"
-       depends on ARC_MMU_V3 || ARC_MMU_V4
 
 config ARC_PAGE_SIZE_4K
        bool "4KB"
@@ -340,6 +314,10 @@ config ARC_HUGEPAGE_16M
 
 endchoice
 
+config PGTABLE_LEVELS
+       int "Number of Page table levels"
+       default 2
+
 config ARC_COMPACT_IRQ_LEVELS
        depends on ISA_ARCOMPACT
        bool "Setup Timer IRQ as high Priority"
@@ -563,9 +541,6 @@ config ARC_DW2_UNWIND
          If you don't debug the kernel, you can say N, but we may not be able
          to solve problems without frame unwind information
 
-config ARC_DBG_TLB_PARANOIA
-       bool "Paranoia Checks in Low Level TLB Handlers"
-
 config ARC_DBG_JUMP_LABEL
        bool "Paranoid checks in Static Keys (jump labels) code"
        depends on JUMP_LABEL
diff --git a/arch/arc/include/asm/atomic-llsc.h b/arch/arc/include/asm/atomic-llsc.h
new file mode 100644 (file)
index 0000000..088d348
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_LLSC_H
+#define _ASM_ARC_ATOMIC_LLSC_H
+
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)                                    \
+static inline void arch_atomic_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned int val;                                               \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:     llock   %[val], [%[ctr]]                \n"             \
+       "       " #asm_op " %[val], %[val], %[i]        \n"             \
+       "       scond   %[val], [%[ctr]]                \n"             \
+       "       bnz     1b                              \n"             \
+       : [val] "=&r"   (val) /* Early clobber to prevent reg reuse */  \
+       : [ctr] "r"     (&v->counter), /* Not "m": llock only supports reg direct addr mode */  \
+         [i]   "ir"    (i)                                             \
+       : "cc");                                                        \
+}                                                                      \
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)        \
+{                                                                      \
+       unsigned int val;                                               \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:     llock   %[val], [%[ctr]]                \n"             \
+       "       " #asm_op " %[val], %[val], %[i]        \n"             \
+       "       scond   %[val], [%[ctr]]                \n"             \
+       "       bnz     1b                              \n"             \
+       : [val] "=&r"   (val)                                           \
+       : [ctr] "r"     (&v->counter),                                  \
+         [i]   "ir"    (i)                                             \
+       : "cc");                                                        \
+                                                                       \
+       return val;                                                     \
+}
+
+#define arch_atomic_add_return_relaxed         arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed         arch_atomic_sub_return_relaxed
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+{                                                                      \
+       unsigned int val, orig;                                         \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:     llock   %[orig], [%[ctr]]               \n"             \
+       "       " #asm_op " %[val], %[orig], %[i]       \n"             \
+       "       scond   %[val], [%[ctr]]                \n"             \
+       "       bnz     1b                              \n"             \
+       : [val] "=&r"   (val),                                          \
+         [orig] "=&r" (orig)                                           \
+       : [ctr] "r"     (&v->counter),                                  \
+         [i]   "ir"    (i)                                             \
+       : "cc");                                                        \
+                                                                       \
+       return orig;                                                    \
+}
+
+#define arch_atomic_fetch_add_relaxed          arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed          arch_atomic_fetch_sub_relaxed
+
+#define arch_atomic_fetch_and_relaxed          arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed       arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed           arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed          arch_atomic_fetch_xor_relaxed
+
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot             arch_atomic_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
diff --git a/arch/arc/include/asm/atomic-spinlock.h b/arch/arc/include/asm/atomic-spinlock.h
new file mode 100644 (file)
index 0000000..2c83034
--- /dev/null
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARC_ATOMIC_SPLOCK_H
+#define _ASM_ARC_ATOMIC_SPLOCK_H
+
+/*
+ * Non hardware assisted Atomic-R-M-W
+ * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ */
+
+static inline void arch_atomic_set(atomic_t *v, int i)
+{
+       /*
+        * Independent of hardware support, all of the atomic_xxx() APIs need
+        * to follow the same locking rules to make sure that a "hardware"
+        * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
+        * sequence
+        *
+        * Thus atomic_set() despite being 1 insn (and seemingly atomic)
+        * requires the locking.
+        */
+       unsigned long flags;
+
+       atomic_ops_lock(flags);
+       WRITE_ONCE(v->counter, i);
+       atomic_ops_unlock(flags);
+}
+
+#define arch_atomic_set_release(v, i)  arch_atomic_set((v), (i))
+
+#define ATOMIC_OP(op, c_op, asm_op)                                    \
+static inline void arch_atomic_##op(int i, atomic_t *v)                        \
+{                                                                      \
+       unsigned long flags;                                            \
+                                                                       \
+       atomic_ops_lock(flags);                                         \
+       v->counter c_op i;                                              \
+       atomic_ops_unlock(flags);                                       \
+}
+
+#define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)                \
+{                                                                      \
+       unsigned long flags;                                            \
+       unsigned int temp;                                              \
+                                                                       \
+       /*                                                              \
+        * spin lock/unlock provides the needed smp_mb() before/after   \
+        */                                                             \
+       atomic_ops_lock(flags);                                         \
+       temp = v->counter;                                              \
+       temp c_op i;                                                    \
+       v->counter = temp;                                              \
+       atomic_ops_unlock(flags);                                       \
+                                                                       \
+       return temp;                                                    \
+}
+
+#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)           \
+{                                                                      \
+       unsigned long flags;                                            \
+       unsigned int orig;                                              \
+                                                                       \
+       /*                                                              \
+        * spin lock/unlock provides the needed smp_mb() before/after   \
+        */                                                             \
+       atomic_ops_lock(flags);                                         \
+       orig = v->counter;                                              \
+       v->counter c_op i;                                              \
+       atomic_ops_unlock(flags);                                       \
+                                                                       \
+       return orig;                                                    \
+}
+
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(add, +=, add)
+ATOMIC_OPS(sub, -=, sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, c_op, asm_op)                                   \
+       ATOMIC_OP(op, c_op, asm_op)                                     \
+       ATOMIC_FETCH_OP(op, c_op, asm_op)
+
+ATOMIC_OPS(and, &=, and)
+ATOMIC_OPS(andnot, &= ~, bic)
+ATOMIC_OPS(or, |=, or)
+ATOMIC_OPS(xor, ^=, xor)
+
+#define arch_atomic_andnot             arch_atomic_andnot
+#define arch_atomic_fetch_andnot       arch_atomic_fetch_andnot
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#endif
index 7a36d79..52ee51e 100644 (file)
 #define arch_atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
-
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
-#define ATOMIC_OP(op, c_op, asm_op)                                    \
-static inline void arch_atomic_##op(int i, atomic_t *v)                        \
-{                                                                      \
-       unsigned int val;                                               \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:     llock   %[val], [%[ctr]]                \n"             \
-       "       " #asm_op " %[val], %[val], %[i]        \n"             \
-       "       scond   %[val], [%[ctr]]                \n"             \
-       "       bnz     1b                              \n"             \
-       : [val] "=&r"   (val) /* Early clobber to prevent reg reuse */  \
-       : [ctr] "r"     (&v->counter), /* Not "m": llock only supports reg direct addr mode */  \
-         [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
-}                                                                      \
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
-static inline int arch_atomic_##op##_return(int i, atomic_t *v)                \
-{                                                                      \
-       unsigned int val;                                               \
-                                                                       \
-       /*                                                              \
-        * Explicit full memory barrier needed before/after as          \
-        * LLOCK/SCOND themselves don't provide any such semantics      \
-        */                                                             \
-       smp_mb();                                                       \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:     llock   %[val], [%[ctr]]                \n"             \
-       "       " #asm_op " %[val], %[val], %[i]        \n"             \
-       "       scond   %[val], [%[ctr]]                \n"             \
-       "       bnz     1b                              \n"             \
-       : [val] "=&r"   (val)                                           \
-       : [ctr] "r"     (&v->counter),                                  \
-         [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
-                                                                       \
-       smp_mb();                                                       \
-                                                                       \
-       return val;                                                     \
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)           \
-{                                                                      \
-       unsigned int val, orig;                                         \
-                                                                       \
-       /*                                                              \
-        * Explicit full memory barrier needed before/after as          \
-        * LLOCK/SCOND themselves don't provide any such semantics      \
-        */                                                             \
-       smp_mb();                                                       \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:     llock   %[orig], [%[ctr]]               \n"             \
-       "       " #asm_op " %[val], %[orig], %[i]       \n"             \
-       "       scond   %[val], [%[ctr]]                \n"             \
-       "       bnz     1b                              \n"             \
-       : [val] "=&r"   (val),                                          \
-         [orig] "=&r" (orig)                                           \
-       : [ctr] "r"     (&v->counter),                                  \
-         [i]   "ir"    (i)                                             \
-       : "cc");                                                        \
-                                                                       \
-       smp_mb();                                                       \
-                                                                       \
-       return orig;                                                    \
-}
-
-#else  /* !CONFIG_ARC_HAS_LLSC */
-
-#ifndef CONFIG_SMP
-
- /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
+#include <asm/atomic-llsc.h>
 #else
+#include <asm/atomic-spinlock.h>
+#endif
 
-static inline void arch_atomic_set(atomic_t *v, int i)
-{
-       /*
-        * Independent of hardware support, all of the atomic_xxx() APIs need
-        * to follow the same locking rules to make sure that a "hardware"
-        * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn
-        * sequence
-        *
-        * Thus atomic_set() despite being 1 insn (and seemingly atomic)
-        * requires the locking.
-        */
-       unsigned long flags;
+#define arch_atomic_cmpxchg(v, o, n)                                   \
+({                                                                     \
+       arch_cmpxchg(&((v)->counter), (o), (n));                        \
+})
 
-       atomic_ops_lock(flags);
-       WRITE_ONCE(v->counter, i);
-       atomic_ops_unlock(flags);
-}
+#ifdef arch_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_relaxed(v, o, n)                           \
+({                                                                     \
+       arch_cmpxchg_relaxed(&((v)->counter), (o), (n));                \
+})
+#endif
 
-#define arch_atomic_set_release(v, i)  arch_atomic_set((v), (i))
+#define arch_atomic_xchg(v, n)                                         \
+({                                                                     \
+       arch_xchg(&((v)->counter), (n));                                \
+})
 
+#ifdef arch_xchg_relaxed
+#define arch_atomic_xchg_relaxed(v, n)                                 \
+({                                                                     \
+       arch_xchg_relaxed(&((v)->counter), (n));                        \
+})
 #endif
 
 /*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
+ * 64-bit atomics
  */
-
-#define ATOMIC_OP(op, c_op, asm_op)                                    \
-static inline void arch_atomic_##op(int i, atomic_t *v)                        \
-{                                                                      \
-       unsigned long flags;                                            \
-                                                                       \
-       atomic_ops_lock(flags);                                         \
-       v->counter c_op i;                                              \
-       atomic_ops_unlock(flags);                                       \
-}
-
-#define ATOMIC_OP_RETURN(op, c_op, asm_op)                             \
-static inline int arch_atomic_##op##_return(int i, atomic_t *v)                \
-{                                                                      \
-       unsigned long flags;                                            \
-       unsigned long temp;                                             \
-                                                                       \
-       /*                                                              \
-        * spin lock/unlock provides the needed smp_mb() before/after   \
-        */                                                             \
-       atomic_ops_lock(flags);                                         \
-       temp = v->counter;                                              \
-       temp c_op i;                                                    \
-       v->counter = temp;                                              \
-       atomic_ops_unlock(flags);                                       \
-                                                                       \
-       return temp;                                                    \
-}
-
-#define ATOMIC_FETCH_OP(op, c_op, asm_op)                              \
-static inline int arch_atomic_fetch_##op(int i, atomic_t *v)           \
-{                                                                      \
-       unsigned long flags;                                            \
-       unsigned long orig;                                             \
-                                                                       \
-       /*                                                              \
-        * spin lock/unlock provides the needed smp_mb() before/after   \
-        */                                                             \
-       atomic_ops_lock(flags);                                         \
-       orig = v->counter;                                              \
-       v->counter c_op i;                                              \
-       atomic_ops_unlock(flags);                                       \
-                                                                       \
-       return orig;                                                    \
-}
-
-#endif /* !CONFIG_ARC_HAS_LLSC */
-
-#define ATOMIC_OPS(op, c_op, asm_op)                                   \
-       ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_OP_RETURN(op, c_op, asm_op)                              \
-       ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(add, +=, add)
-ATOMIC_OPS(sub, -=, sub)
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op, c_op, asm_op)                                   \
-       ATOMIC_OP(op, c_op, asm_op)                                     \
-       ATOMIC_FETCH_OP(op, c_op, asm_op)
-
-ATOMIC_OPS(and, &=, and)
-ATOMIC_OPS(andnot, &= ~, bic)
-ATOMIC_OPS(or, |=, or)
-ATOMIC_OPS(xor, ^=, xor)
-
-#define arch_atomic_andnot             arch_atomic_andnot
-#define arch_atomic_fetch_andnot       arch_atomic_fetch_andnot
-
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
 #ifdef CONFIG_GENERIC_ATOMIC64
-
 #include <asm-generic/atomic64.h>
-
-#else  /* Kconfig ensures this is only enabled with needed h/w assist */
-
-/*
- * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
- *  - The address HAS to be 64-bit aligned
- *  - There are 2 semantics involved here:
- *    = exclusive implies no interim update between load/store to same addr
- *    = both words are observed/updated together: this is guaranteed even
- *      for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
- *      is NOT required to use LLOCKD+SCONDD, STD suffices
- */
-
-typedef struct {
-       s64 __aligned(8) counter;
-} atomic64_t;
-
-#define ATOMIC64_INIT(a) { (a) }
-
-static inline s64 arch_atomic64_read(const atomic64_t *v)
-{
-       s64 val;
-
-       __asm__ __volatile__(
-       "       ldd   %0, [%1]  \n"
-       : "=r"(val)
-       : "r"(&v->counter));
-
-       return val;
-}
-
-static inline void arch_atomic64_set(atomic64_t *v, s64 a)
-{
-       /*
-        * This could have been a simple assignment in "C" but would need
-        * explicit volatile. Otherwise gcc optimizers could elide the store
-        * which borked atomic64 self-test
-        * In the inline asm version, memory clobber needed for exact same
-        * reason, to tell gcc about the store.
-        *
-        * This however is not needed for sibling atomic64_add() etc since both
-        * load/store are explicitly done in inline asm. As long as API is used
-        * for each access, gcc has no way to optimize away any load/store
-        */
-       __asm__ __volatile__(
-       "       std   %0, [%1]  \n"
-       :
-       : "r"(a), "r"(&v->counter)
-       : "memory");
-}
-
-#define ATOMIC64_OP(op, op1, op2)                                      \
-static inline void arch_atomic64_##op(s64 a, atomic64_t *v)            \
-{                                                                      \
-       s64 val;                                                        \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:                             \n"                             \
-       "       llockd  %0, [%1]        \n"                             \
-       "       " #op1 " %L0, %L0, %L2  \n"                             \
-       "       " #op2 " %H0, %H0, %H2  \n"                             \
-       "       scondd   %0, [%1]       \n"                             \
-       "       bnz     1b              \n"                             \
-       : "=&r"(val)                                                    \
-       : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");                                                        \
-}                                                                      \
-
-#define ATOMIC64_OP_RETURN(op, op1, op2)                               \
-static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)    \
-{                                                                      \
-       s64 val;                                                        \
-                                                                       \
-       smp_mb();                                                       \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:                             \n"                             \
-       "       llockd   %0, [%1]       \n"                             \
-       "       " #op1 " %L0, %L0, %L2  \n"                             \
-       "       " #op2 " %H0, %H0, %H2  \n"                             \
-       "       scondd   %0, [%1]       \n"                             \
-       "       bnz     1b              \n"                             \
-       : [val] "=&r"(val)                                              \
-       : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");        /* memory clobber comes from smp_mb() */        \
-                                                                       \
-       smp_mb();                                                       \
-                                                                       \
-       return val;                                                     \
-}
-
-#define ATOMIC64_FETCH_OP(op, op1, op2)                                        \
-static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)       \
-{                                                                      \
-       s64 val, orig;                                                  \
-                                                                       \
-       smp_mb();                                                       \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:                             \n"                             \
-       "       llockd   %0, [%2]       \n"                             \
-       "       " #op1 " %L1, %L0, %L3  \n"                             \
-       "       " #op2 " %H1, %H0, %H3  \n"                             \
-       "       scondd   %1, [%2]       \n"                             \
-       "       bnz     1b              \n"                             \
-       : "=&r"(orig), "=&r"(val)                                       \
-       : "r"(&v->counter), "ir"(a)                                     \
-       : "cc");        /* memory clobber comes from smp_mb() */        \
-                                                                       \
-       smp_mb();                                                       \
-                                                                       \
-       return orig;                                                    \
-}
-
-#define ATOMIC64_OPS(op, op1, op2)                                     \
-       ATOMIC64_OP(op, op1, op2)                                       \
-       ATOMIC64_OP_RETURN(op, op1, op2)                                \
-       ATOMIC64_FETCH_OP(op, op1, op2)
-
-ATOMIC64_OPS(add, add.f, adc)
-ATOMIC64_OPS(sub, sub.f, sbc)
-ATOMIC64_OPS(and, and, and)
-ATOMIC64_OPS(andnot, bic, bic)
-ATOMIC64_OPS(or, or, or)
-ATOMIC64_OPS(xor, xor, xor)
-
-#define arch_atomic64_andnot           arch_atomic64_andnot
-#define arch_atomic64_fetch_andnot     arch_atomic64_fetch_andnot
-
-#undef ATOMIC64_OPS
-#undef ATOMIC64_FETCH_OP
-#undef ATOMIC64_OP_RETURN
-#undef ATOMIC64_OP
-
-static inline s64
-arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
-{
-       s64 prev;
-
-       smp_mb();
-
-       __asm__ __volatile__(
-       "1:     llockd  %0, [%1]        \n"
-       "       brne    %L0, %L2, 2f    \n"
-       "       brne    %H0, %H2, 2f    \n"
-       "       scondd  %3, [%1]        \n"
-       "       bnz     1b              \n"
-       "2:                             \n"
-       : "=&r"(prev)
-       : "r"(ptr), "ir"(expected), "r"(new)
-       : "cc");        /* memory clobber comes from smp_mb() */
-
-       smp_mb();
-
-       return prev;
-}
-
-static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
-{
-       s64 prev;
-
-       smp_mb();
-
-       __asm__ __volatile__(
-       "1:     llockd  %0, [%1]        \n"
-       "       scondd  %2, [%1]        \n"
-       "       bnz     1b              \n"
-       "2:                             \n"
-       : "=&r"(prev)
-       : "r"(ptr), "r"(new)
-       : "cc");        /* memory clobber comes from smp_mb() */
-
-       smp_mb();
-
-       return prev;
-}
-
-/**
- * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-
-static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
-{
-       s64 val;
-
-       smp_mb();
-
-       __asm__ __volatile__(
-       "1:     llockd  %0, [%1]        \n"
-       "       sub.f   %L0, %L0, 1     # w0 - 1, set C on borrow\n"
-       "       sub.c   %H0, %H0, 1     # if C set, w1 - 1\n"
-       "       brlt    %H0, 0, 2f      \n"
-       "       scondd  %0, [%1]        \n"
-       "       bnz     1b              \n"
-       "2:                             \n"
-       : "=&r"(val)
-       : "r"(&v->counter)
-       : "cc");        /* memory clobber comes from smp_mb() */
-
-       smp_mb();
-
-       return val;
-}
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-
-/**
- * arch_atomic64_fetch_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if it was not @u.
- * Returns the old value of @v
- */
-static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-       s64 old, temp;
-
-       smp_mb();
-
-       __asm__ __volatile__(
-       "1:     llockd  %0, [%2]        \n"
-       "       brne    %L0, %L4, 2f    # continue to add since v != u \n"
-       "       breq.d  %H0, %H4, 3f    # return since v == u \n"
-       "2:                             \n"
-       "       add.f   %L1, %L0, %L3   \n"
-       "       adc     %H1, %H0, %H3   \n"
-       "       scondd  %1, [%2]        \n"
-       "       bnz     1b              \n"
-       "3:                             \n"
-       : "=&r"(old), "=&r" (temp)
-       : "r"(&v->counter), "r"(a), "r"(u)
-       : "cc");        /* memory clobber comes from smp_mb() */
-
-       smp_mb();
-
-       return old;
-}
-#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
-
-#endif /* !CONFIG_GENERIC_ATOMIC64 */
+#else
+#include <asm/atomic64-arcv2.h>
+#endif
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/atomic64-arcv2.h b/arch/arc/include/asm/atomic64-arcv2.h
new file mode 100644 (file)
index 0000000..c5a8010
--- /dev/null
@@ -0,0 +1,250 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ */
+
+#ifndef _ASM_ARC_ATOMIC64_ARCV2_H
+#define _ASM_ARC_ATOMIC64_ARCV2_H
+
+typedef struct {
+       s64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline s64 arch_atomic64_read(const atomic64_t *v)
+{
+       s64 val;
+
+       __asm__ __volatile__(
+       "       ldd   %0, [%1]  \n"
+       : "=r"(val)
+       : "r"(&v->counter));
+
+       return val;
+}
+
+static inline void arch_atomic64_set(atomic64_t *v, s64 a)
+{
+       /*
+        * This could have been a simple assignment in "C" but would need
+        * explicit volatile. Otherwise gcc optimizers could elide the store
+        * which borked atomic64 self-test
+        * In the inline asm version, memory clobber needed for exact same
+        * reason, to tell gcc about the store.
+        *
+        * This however is not needed for sibling atomic64_add() etc since both
+        * load/store are explicitly done in inline asm. As long as API is used
+        * for each access, gcc has no way to optimize away any load/store
+        */
+       __asm__ __volatile__(
+       "       std   %0, [%1]  \n"
+       :
+       : "r"(a), "r"(&v->counter)
+       : "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2)                                      \
+static inline void arch_atomic64_##op(s64 a, atomic64_t *v)            \
+{                                                                      \
+       s64 val;                                                        \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:                             \n"                             \
+       "       llockd  %0, [%1]        \n"                             \
+       "       " #op1 " %L0, %L0, %L2  \n"                             \
+       "       " #op2 " %H0, %H0, %H2  \n"                             \
+       "       scondd   %0, [%1]       \n"                             \
+       "       bnz     1b              \n"                             \
+       : "=&r"(val)                                                    \
+       : "r"(&v->counter), "ir"(a)                                     \
+       : "cc");                                                        \
+}                                                                      \
+
+#define ATOMIC64_OP_RETURN(op, op1, op2)                               \
+static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)    \
+{                                                                      \
+       s64 val;                                                        \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:                             \n"                             \
+       "       llockd   %0, [%1]       \n"                             \
+       "       " #op1 " %L0, %L0, %L2  \n"                             \
+       "       " #op2 " %H0, %H0, %H2  \n"                             \
+       "       scondd   %0, [%1]       \n"                             \
+       "       bnz     1b              \n"                             \
+       : [val] "=&r"(val)                                              \
+       : "r"(&v->counter), "ir"(a)                                     \
+       : "cc");        /* memory clobber comes from smp_mb() */        \
+                                                                       \
+       return val;                                                     \
+}
+
+#define arch_atomic64_add_return_relaxed       arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed       arch_atomic64_sub_return_relaxed
+
+#define ATOMIC64_FETCH_OP(op, op1, op2)                                        \
+static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)     \
+{                                                                      \
+       s64 val, orig;                                                  \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:                             \n"                             \
+       "       llockd   %0, [%2]       \n"                             \
+       "       " #op1 " %L1, %L0, %L3  \n"                             \
+       "       " #op2 " %H1, %H0, %H3  \n"                             \
+       "       scondd   %1, [%2]       \n"                             \
+       "       bnz     1b              \n"                             \
+       : "=&r"(orig), "=&r"(val)                                       \
+       : "r"(&v->counter), "ir"(a)                                     \
+       : "cc");        /* memory clobber comes from smp_mb() */        \
+                                                                       \
+       return orig;                                                    \
+}
+
+#define arch_atomic64_fetch_add_relaxed                arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed                arch_atomic64_fetch_sub_relaxed
+
+#define arch_atomic64_fetch_and_relaxed                arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed     arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed         arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed                arch_atomic64_fetch_xor_relaxed
+
+#define ATOMIC64_OPS(op, op1, op2)                                     \
+       ATOMIC64_OP(op, op1, op2)                                       \
+       ATOMIC64_OP_RETURN(op, op1, op2)                                \
+       ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, op1, op2)                                     \
+       ATOMIC64_OP(op, op1, op2)                                       \
+       ATOMIC64_FETCH_OP(op, op1, op2)
+
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#define arch_atomic64_andnot           arch_atomic64_andnot
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline s64
+arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+{
+       s64 prev;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "1:     llockd  %0, [%1]        \n"
+       "       brne    %L0, %L2, 2f    \n"
+       "       brne    %H0, %H2, 2f    \n"
+       "       scondd  %3, [%1]        \n"
+       "       bnz     1b              \n"
+       "2:                             \n"
+       : "=&r"(prev)
+       : "r"(ptr), "ir"(expected), "r"(new)
+       : "cc");        /* memory clobber comes from smp_mb() */
+
+       smp_mb();
+
+       return prev;
+}
+
+static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
+{
+       s64 prev;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "1:     llockd  %0, [%1]        \n"
+       "       scondd  %2, [%1]        \n"
+       "       bnz     1b              \n"
+       "2:                             \n"
+       : "=&r"(prev)
+       : "r"(ptr), "r"(new)
+       : "cc");        /* memory clobber comes from smp_mb() */
+
+       smp_mb();
+
+       return prev;
+}
+
+/**
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
+{
+       s64 val;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "1:     llockd  %0, [%1]        \n"
+       "       sub.f   %L0, %L0, 1     # w0 - 1, set C on borrow\n"
+       "       sub.c   %H0, %H0, 1     # if C set, w1 - 1\n"
+       "       brlt    %H0, 0, 2f      \n"
+       "       scondd  %0, [%1]        \n"
+       "       bnz     1b              \n"
+       "2:                             \n"
+       : "=&r"(val)
+       : "r"(&v->counter)
+       : "cc");        /* memory clobber comes from smp_mb() */
+
+       smp_mb();
+
+       return val;
+}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
+
+/**
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if it was not @u.
+ * Returns the old value of @v
+ */
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+       s64 old, temp;
+
+       smp_mb();
+
+       __asm__ __volatile__(
+       "1:     llockd  %0, [%2]        \n"
+       "       brne    %L0, %L4, 2f    # continue to add since v != u \n"
+       "       breq.d  %H0, %H4, 3f    # return since v == u \n"
+       "2:                             \n"
+       "       add.f   %L1, %L0, %L3   \n"
+       "       adc     %H1, %H0, %H3   \n"
+       "       scondd  %1, [%2]        \n"
+       "       bnz     1b              \n"
+       "3:                             \n"
+       : "=&r"(old), "=&r" (temp)
+       : "r"(&v->counter), "r"(a), "r"(u)
+       : "cc");        /* memory clobber comes from smp_mb() */
+
+       smp_mb();
+
+       return old;
+}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
+
+#endif
index fb98440..a7daaf6 100644 (file)
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <asm/barrier.h>
-#ifndef CONFIG_ARC_HAS_LLSC
-#include <asm/smp.h>
-#endif
-
-#ifdef CONFIG_ARC_HAS_LLSC
-
-/*
- * Hardware assisted Atomic-R-M-W
- */
-
-#define BIT_OP(op, c_op, asm_op)                                       \
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{                                                                      \
-       unsigned int temp;                                              \
-                                                                       \
-       m += nr >> 5;                                                   \
-                                                                       \
-       nr &= 0x1f;                                                     \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:     llock       %0, [%1]            \n"                     \
-       "       " #asm_op " %0, %0, %2  \n"                             \
-       "       scond       %0, [%1]            \n"                     \
-       "       bnz         1b                  \n"                     \
-       : "=&r"(temp)   /* Early clobber, to prevent reg reuse */       \
-       : "r"(m),       /* Not "m": llock only supports reg direct addr mode */ \
-         "ir"(nr)                                                      \
-       : "cc");                                                        \
-}
-
-/*
- * Semantically:
- *    Test the bit
- *    if clear
- *        set it and return 0 (old value)
- *    else
- *        return 1 (old value).
- *
- * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally
- * and the old value of bit is returned
- */
-#define TEST_N_BIT_OP(op, c_op, asm_op)                                        \
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{                                                                      \
-       unsigned long old, temp;                                        \
-                                                                       \
-       m += nr >> 5;                                                   \
-                                                                       \
-       nr &= 0x1f;                                                     \
-                                                                       \
-       /*                                                              \
-        * Explicit full memory barrier needed before/after as          \
-        * LLOCK/SCOND themselves don't provide any such smenatic       \
-        */                                                             \
-       smp_mb();                                                       \
-                                                                       \
-       __asm__ __volatile__(                                           \
-       "1:     llock       %0, [%2]    \n"                             \
-       "       " #asm_op " %1, %0, %3  \n"                             \
-       "       scond       %1, [%2]    \n"                             \
-       "       bnz         1b          \n"                             \
-       : "=&r"(old), "=&r"(temp)                                       \
-       : "r"(m), "ir"(nr)                                              \
-       : "cc");                                                        \
-                                                                       \
-       smp_mb();                                                       \
-                                                                       \
-       return (old & (1 << nr)) != 0;                                  \
-}
-
-#else /* !CONFIG_ARC_HAS_LLSC */
-
-/*
- * Non hardware assisted Atomic-R-M-W
- * Locking would change to irq-disabling only (UP) and spinlocks (SMP)
- *
- * There's "significant" micro-optimization in writing our own variants of
- * bitops (over generic variants)
- *
- * (1) The generic APIs have "signed" @nr while we have it "unsigned"
- *     This avoids extra code to be generated for pointer arithmatic, since
- *     is "not sure" that index is NOT -ve
- * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc
- *     only consider bottom 5 bits of @nr, so NO need to mask them off.
- *     (GCC Quirk: however for constant @nr we still need to do the masking
- *             at compile time)
- */
-
-#define BIT_OP(op, c_op, asm_op)                                       \
-static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\
-{                                                                      \
-       unsigned long temp, flags;                                      \
-       m += nr >> 5;                                                   \
-                                                                       \
-       /*                                                              \
-        * spin lock/unlock provide the needed smp_mb() before/after    \
-        */                                                             \
-       bitops_lock(flags);                                             \
-                                                                       \
-       temp = *m;                                                      \
-       *m = temp c_op (1UL << (nr & 0x1f));                                    \
-                                                                       \
-       bitops_unlock(flags);                                           \
-}
-
-#define TEST_N_BIT_OP(op, c_op, asm_op)                                        \
-static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{                                                                      \
-       unsigned long old, flags;                                       \
-       m += nr >> 5;                                                   \
-                                                                       \
-       bitops_lock(flags);                                             \
-                                                                       \
-       old = *m;                                                       \
-       *m = old c_op (1UL << (nr & 0x1f));                             \
-                                                                       \
-       bitops_unlock(flags);                                           \
-                                                                       \
-       return (old & (1UL << (nr & 0x1f))) != 0;                       \
-}
-
-#endif
-
-/***************************************
- * Non atomic variants
- **************************************/
-
-#define __BIT_OP(op, c_op, asm_op)                                     \
-static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m)   \
-{                                                                      \
-       unsigned long temp;                                             \
-       m += nr >> 5;                                                   \
-                                                                       \
-       temp = *m;                                                      \
-       *m = temp c_op (1UL << (nr & 0x1f));                            \
-}
-
-#define __TEST_N_BIT_OP(op, c_op, asm_op)                              \
-static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\
-{                                                                      \
-       unsigned long old;                                              \
-       m += nr >> 5;                                                   \
-                                                                       \
-       old = *m;                                                       \
-       *m = old c_op (1UL << (nr & 0x1f));                             \
-                                                                       \
-       return (old & (1UL << (nr & 0x1f))) != 0;                       \
-}
-
-#define BIT_OPS(op, c_op, asm_op)                                      \
-                                                                       \
-       /* set_bit(), clear_bit(), change_bit() */                      \
-       BIT_OP(op, c_op, asm_op)                                        \
-                                                                       \
-       /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\
-       TEST_N_BIT_OP(op, c_op, asm_op)                                 \
-                                                                       \
-       /* __set_bit(), __clear_bit(), __change_bit() */                \
-       __BIT_OP(op, c_op, asm_op)                                      \
-                                                                       \
-       /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\
-       __TEST_N_BIT_OP(op, c_op, asm_op)
-
-BIT_OPS(set, |, bset)
-BIT_OPS(clear, & ~, bclr)
-BIT_OPS(change, ^, bxor)
-
-/*
- * This routine doesn't need to be atomic.
- */
-static inline int
-test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-       unsigned long mask;
-
-       addr += nr >> 5;
-
-       mask = 1UL << (nr & 0x1f);
-
-       return ((mask & *addr) != 0);
-}
 
 #ifdef CONFIG_ISA_ARCOMPACT
 
@@ -296,7 +114,7 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long word)
  * @result: [1-32]
  * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0
  */
-static inline __attribute__ ((const)) int fls(unsigned long x)
+static inline __attribute__ ((const)) int fls(unsigned int x)
 {
        int n;
 
@@ -323,7 +141,7 @@ static inline __attribute__ ((const)) int __fls(unsigned long x)
  * ffs = Find First Set in word (LSB to MSB)
  * @result: [1-32], 0 if all 0's
  */
-static inline __attribute__ ((const)) int ffs(unsigned long x)
+static inline __attribute__ ((const)) int ffs(unsigned int x)
 {
        int n;
 
@@ -368,6 +186,8 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x)
 #include <asm-generic/bitops/fls64.h>
 #include <asm-generic/bitops/sched.h>
 #include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
 #include <asm-generic/bitops/find.h>
 #include <asm-generic/bitops/le.h>
index d8ece42..f0f1fc5 100644 (file)
 #define ARCH_SLAB_MINALIGN     8
 #endif
 
-extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
-
 extern int ioc_enable;
 extern unsigned long perip_base, perip_end;
 
index d42917e..c5b544a 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef __ASM_ARC_CMPXCHG_H
 #define __ASM_ARC_CMPXCHG_H
 
+#include <linux/build_bug.h>
 #include <linux/types.h>
 
 #include <asm/barrier.h>
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-       unsigned long prev;
-
-       /*
-        * Explicit full memory barrier needed before/after as
-        * LLOCK/SCOND themselves don't provide any such semantics
-        */
-       smp_mb();
-
-       __asm__ __volatile__(
-       "1:     llock   %0, [%1]        \n"
-       "       brne    %0, %2, 2f      \n"
-       "       scond   %3, [%1]        \n"
-       "       bnz     1b              \n"
-       "2:                             \n"
-       : "=&r"(prev)   /* Early clobber, to prevent reg reuse */
-       : "r"(ptr),     /* Not "m": llock only supports reg direct addr mode */
-         "ir"(expected),
-         "r"(new)      /* can't be "ir". scond can't take LIMM for "b" */
-       : "cc", "memory"); /* so that gcc knows memory is being written here */
-
-       smp_mb();
-
-       return prev;
-}
-
-#else /* !CONFIG_ARC_HAS_LLSC */
-
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
-{
-       unsigned long flags;
-       int prev;
-       volatile unsigned long *p = ptr;
-
-       /*
-        * spin lock/unlock provide the needed smp_mb() before/after
-        */
-       atomic_ops_lock(flags);
-       prev = *p;
-       if (prev == expected)
-               *p = new;
-       atomic_ops_unlock(flags);
-       return prev;
-}
-
-#endif
+/*
+ * if (*ptr == @old)
+ *      *ptr = @new
+ */
+#define __cmpxchg(ptr, old, new)                                       \
+({                                                                     \
+       __typeof__(*(ptr)) _prev;                                       \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "1:     llock  %0, [%1] \n"                                     \
+       "       brne   %0, %2, 2f       \n"                             \
+       "       scond  %3, [%1] \n"                                     \
+       "       bnz     1b              \n"                             \
+       "2:                             \n"                             \
+       : "=&r"(_prev)  /* Early clobber prevent reg reuse */           \
+       : "r"(ptr),     /* Not "m": llock only supports reg */          \
+         "ir"(old),                                                    \
+         "r"(new)      /* Not "ir": scond can't take LIMM */           \
+       : "cc",                                                         \
+         "memory");    /* gcc knows memory is clobbered */             \
+                                                                       \
+       _prev;                                                          \
+})
 
-#define arch_cmpxchg(ptr, o, n) ({                     \
-       (typeof(*(ptr)))__cmpxchg((ptr),                \
-                                 (unsigned long)(o),   \
-                                 (unsigned long)(n));  \
+#define arch_cmpxchg_relaxed(ptr, old, new)                            \
+({                                                                     \
+       __typeof__(ptr) _p_ = (ptr);                                    \
+       __typeof__(*(ptr)) _o_ = (old);                                 \
+       __typeof__(*(ptr)) _n_ = (new);                                 \
+       __typeof__(*(ptr)) _prev_;                                      \
+                                                                       \
+       switch(sizeof((_p_))) {                                         \
+       case 4:                                                         \
+               _prev_ = __cmpxchg(_p_, _o_, _n_);                      \
+               break;                                                  \
+       default:                                                        \
+               BUILD_BUG();                                            \
+       }                                                               \
+       _prev_;                                                         \
 })
 
-/*
- * atomic_cmpxchg is same as cmpxchg
- *   LLSC: only different in data-type, semantics are exactly same
- *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
- *         semantics, and this lock also happens to be used by atomic_*()
- */
-#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
+#else
 
+#define arch_cmpxchg(ptr, old, new)                                    \
+({                                                                     \
+       volatile __typeof__(ptr) _p_ = (ptr);                           \
+       __typeof__(*(ptr)) _o_ = (old);                                 \
+       __typeof__(*(ptr)) _n_ = (new);                                 \
+       __typeof__(*(ptr)) _prev_;                                      \
+       unsigned long __flags;                                          \
+                                                                       \
+       BUILD_BUG_ON(sizeof(_p_) != 4);                                 \
+                                                                       \
+       /*                                                              \
+        * spin lock/unlock provide the needed smp_mb() before/after    \
+        */                                                             \
+       atomic_ops_lock(__flags);                                       \
+       _prev_ = *_p_;                                                  \
+       if (_prev_ == _o_)                                              \
+               *_p_ = _n_;                                             \
+       atomic_ops_unlock(__flags);                                     \
+       _prev_;                                                         \
+})
+
+#endif
 
 /*
- * xchg (reg with memory) based on "Native atomic" EX insn
+ * xchg
  */
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-                                  int size)
-{
-       extern unsigned long __xchg_bad_pointer(void);
-
-       switch (size) {
-       case 4:
-               smp_mb();
-
-               __asm__ __volatile__(
-               "       ex  %0, [%1]    \n"
-               : "+r"(val)
-               : "r"(ptr)
-               : "memory");
+#ifdef CONFIG_ARC_HAS_LLSC
 
-               smp_mb();
+#define __xchg(ptr, val)                                               \
+({                                                                     \
+       __asm__ __volatile__(                                           \
+       "       ex  %0, [%1]    \n"     /* set new value */             \
+       : "+r"(val)                                                     \
+       : "r"(ptr)                                                      \
+       : "memory");                                                    \
+       _val_;          /* get old value */                             \
+})
 
-               return val;
-       }
-       return __xchg_bad_pointer();
-}
+#define arch_xchg_relaxed(ptr, val)                                    \
+({                                                                     \
+       __typeof__(ptr) _p_ = (ptr);                                    \
+       __typeof__(*(ptr)) _val_ = (val);                               \
+                                                                       \
+       switch(sizeof(*(_p_))) {                                        \
+       case 4:                                                         \
+               _val_ = __xchg(_p_, _val_);                             \
+               break;                                                  \
+       default:                                                        \
+               BUILD_BUG();                                            \
+       }                                                               \
+       _val_;                                                          \
+})
 
-#define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \
-                                                sizeof(*(ptr))))
+#else  /* !CONFIG_ARC_HAS_LLSC */
 
 /*
- * xchg() maps directly to ARC EX instruction which guarantees atomicity.
- * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
- * due to a subtle reason:
- *  - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
- *    of  kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
- *    Hence xchg() needs to follow same locking rules.
- *
- * Technically the lock is also needed for UP (boils down to irq save/restore)
- * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
- * Other way around, xchg is one instruction anyways, so can't be interrupted
- * as such
+ * EX instructions is baseline and present in !LLSC too. But in this
+ * regime it still needs use @atomic_ops_lock spinlock to allow interop
+ * with cmpxchg() which uses spinlock in !LLSC
+ * (llist.h use xchg and cmpxchg on sama data)
  */
 
-#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
-
-#define arch_xchg(ptr, with)           \
-({                                     \
-       unsigned long flags;            \
-       typeof(*(ptr)) old_val;         \
-                                       \
-       atomic_ops_lock(flags);         \
-       old_val = _xchg(ptr, with);     \
-       atomic_ops_unlock(flags);       \
-       old_val;                        \
+#define arch_xchg(ptr, val)                                            \
+({                                                                     \
+       __typeof__(ptr) _p_ = (ptr);                                    \
+       __typeof__(*(ptr)) _val_ = (val);                               \
+                                                                       \
+       unsigned long __flags;                                          \
+                                                                       \
+       atomic_ops_lock(__flags);                                       \
+                                                                       \
+       __asm__ __volatile__(                                           \
+       "       ex  %0, [%1]    \n"                                     \
+       : "+r"(_val_)                                                   \
+       : "r"(_p_)                                                      \
+       : "memory");                                                    \
+                                                                       \
+       atomic_ops_unlock(__flags);                                     \
+       _val_;                                                          \
 })
 
-#else
-
-#define arch_xchg(ptr, with)  _xchg(ptr, with)
-
 #endif
 
-/*
- * "atomic" variant of xchg()
- * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following definition
- * is incorrect. But here's the rationale:
- *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
- *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
- *         is natively "SMP safe", no serialization required).
- *   UP  : other atomics disable IRQ, so no way a difft ctxt atomic_xchg()
- *         could clobber them. atomic_xchg() itself would be 1 insn, so it
- *         can't be clobbered by others. Thus no serialization required when
- *         atomic_xchg is involved.
- */
-#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
-
 #endif
index 6dbf5ce..5aab4f9 100644 (file)
  * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro PROLOG_FREEUP_REG       reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-       sr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
        st  \reg, [\mem]
-#endif
 .endm
 
 .macro PROLOG_RESTORE_REG      reg, mem
-#ifndef ARC_USE_SCRATCH_REG
-       lr  \reg, [ARC_REG_SCRATCH_DATA0]
-#else
        ld  \reg, [\mem]
-#endif
 .endm
 
 /*--------------------------------------------------------------
index 4eef17c..11b0ff2 100644 (file)
@@ -58,14 +58,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
                                 pmd_t *pmd);
 
-/* Generic variants assume pgtable_t is struct page *, hence need for these */
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-                                      pgtable_t pgtable);
-
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
                                unsigned long end);
diff --git a/arch/arc/include/asm/mmu-arcv2.h b/arch/arc/include/asm/mmu-arcv2.h
new file mode 100644 (file)
index 0000000..ed9036d
--- /dev/null
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com)
+ *
+ * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed.
+ * This file contains the TLB access registers and commands
+ */
+
+#ifndef _ASM_ARC_MMU_ARCV2_H
+#define _ASM_ARC_MMU_ARCV2_H
+
+/*
+ * TLB Management regs
+ */
+#define ARC_REG_MMU_BCR                0x06f
+
+#ifdef CONFIG_ARC_MMU_V3
+#define ARC_REG_TLBPD0         0x405
+#define ARC_REG_TLBPD1         0x406
+#define ARC_REG_TLBPD1HI       0       /* Dummy: allows common code */
+#define ARC_REG_TLBINDEX       0x407
+#define ARC_REG_TLBCOMMAND     0x408
+#define ARC_REG_PID            0x409
+#define ARC_REG_SCRATCH_DATA0  0x418
+#else
+#define ARC_REG_TLBPD0         0x460
+#define ARC_REG_TLBPD1         0x461
+#define ARC_REG_TLBPD1HI       0x463
+#define ARC_REG_TLBINDEX       0x464
+#define ARC_REG_TLBCOMMAND     0x465
+#define ARC_REG_PID            0x468
+#define ARC_REG_SCRATCH_DATA0  0x46c
+#endif
+
+/* Bits in MMU PID reg */
+#define __TLB_ENABLE           (1 << 31)
+#define __PROG_ENABLE          (1 << 30)
+#define MMU_ENABLE             (__TLB_ENABLE | __PROG_ENABLE)
+
+/* Bits in TLB Index reg */
+#define TLB_LKUP_ERR           0x80000000
+
+#ifdef CONFIG_ARC_MMU_V3
+#define TLB_DUP_ERR            (TLB_LKUP_ERR | 0x00000001)
+#else
+#define TLB_DUP_ERR            (TLB_LKUP_ERR | 0x40000000)
+#endif
+
+/*
+ * TLB Commands
+ */
+#define TLBWrite               0x1
+#define TLBRead                0x2
+#define TLBGetIndex            0x3
+#define TLBProbe               0x4
+#define TLBWriteNI             0x5  /* write JTLB without inv uTLBs */
+#define TLBIVUTLB              0x6  /* explicitly inv uTLBs */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define TLBInsertEntry         0x7
+#define TLBDeleteEntry         0x8
+#endif
+
+/* Masks for actual TLB "PD"s */
+#define PTE_BITS_IN_PD0                (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+#define PTE_BITS_RWX           (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+
+#define PTE_BITS_NON_RWX_IN_PD1        (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+extern int pae40_exist_but_not_enab(void);
+
+static inline int is_pae40_enabled(void)
+{
+       return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
+}
+
+static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid)
+{
+       write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE);
+}
+
+static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd)
+{
+       /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
+#ifdef CONFIG_ISA_ARCV2
+       write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd);
+#endif
+}
+
+#else
+
+.macro ARC_MMU_REENABLE reg
+       lr \reg, [ARC_REG_PID]
+       or \reg, \reg, MMU_ENABLE
+       sr \reg, [ARC_REG_PID]
+.endm
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
index 26b731d..ca427c3 100644 (file)
@@ -7,98 +7,15 @@
 #define _ASM_ARC_MMU_H
 
 #ifndef __ASSEMBLY__
-#include <linux/threads.h>     /* NR_CPUS */
-#endif
-
-#if defined(CONFIG_ARC_MMU_V1)
-#define CONFIG_ARC_MMU_VER 1
-#elif defined(CONFIG_ARC_MMU_V2)
-#define CONFIG_ARC_MMU_VER 2
-#elif defined(CONFIG_ARC_MMU_V3)
-#define CONFIG_ARC_MMU_VER 3
-#elif defined(CONFIG_ARC_MMU_V4)
-#define CONFIG_ARC_MMU_VER 4
-#endif
-
-/* MMU Management regs */
-#define ARC_REG_MMU_BCR                0x06f
-#if (CONFIG_ARC_MMU_VER < 4)
-#define ARC_REG_TLBPD0         0x405
-#define ARC_REG_TLBPD1         0x406
-#define ARC_REG_TLBPD1HI       0       /* Dummy: allows code sharing with ARC700 */
-#define ARC_REG_TLBINDEX       0x407
-#define ARC_REG_TLBCOMMAND     0x408
-#define ARC_REG_PID            0x409
-#define ARC_REG_SCRATCH_DATA0  0x418
-#else
-#define ARC_REG_TLBPD0         0x460
-#define ARC_REG_TLBPD1         0x461
-#define ARC_REG_TLBPD1HI       0x463
-#define ARC_REG_TLBINDEX       0x464
-#define ARC_REG_TLBCOMMAND     0x465
-#define ARC_REG_PID            0x468
-#define ARC_REG_SCRATCH_DATA0  0x46c
-#endif
-
-#if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP)
-#define        ARC_USE_SCRATCH_REG
-#endif
-
-/* Bits in MMU PID register */
-#define __TLB_ENABLE           (1 << 31)
-#define __PROG_ENABLE          (1 << 30)
-#define MMU_ENABLE             (__TLB_ENABLE | __PROG_ENABLE)
-
-/* Error code if probe fails */
-#define TLB_LKUP_ERR           0x80000000
-
-#if (CONFIG_ARC_MMU_VER < 4)
-#define TLB_DUP_ERR    (TLB_LKUP_ERR | 0x00000001)
-#else
-#define TLB_DUP_ERR    (TLB_LKUP_ERR | 0x40000000)
-#endif
-
-/* TLB Commands */
-#define TLBWrite    0x1
-#define TLBRead     0x2
-#define TLBGetIndex 0x3
-#define TLBProbe    0x4
-
-#if (CONFIG_ARC_MMU_VER >= 2)
-#define TLBWriteNI  0x5                /* write JTLB without inv uTLBs */
-#define TLBIVUTLB   0x6                /* explicitly inv uTLBs */
-#else
-#define TLBWriteNI  TLBWrite   /* Not present in hardware, fallback */
-#endif
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define TLBInsertEntry 0x7
-#define TLBDeleteEntry 0x8
-#endif
 
-#ifndef __ASSEMBLY__
+#include <linux/threads.h>     /* NR_CPUS */
 
 typedef struct {
        unsigned long asid[NR_CPUS];    /* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
-#else
-#define tlb_paranoid_check(a, b)
 #endif
 
-void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void read_decode_mmu_bcr(void);
-
-static inline int is_pae40_enabled(void)
-{
-       return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
-}
-
-extern int pae40_exist_but_not_enab(void);
-
-#endif /* !__ASSEMBLY__ */
+#include <asm/mmu-arcv2.h>
 
 #endif
index df16406..dda471f 100644 (file)
 #ifndef _ASM_ARC_MMU_CONTEXT_H
 #define _ASM_ARC_MMU_CONTEXT_H
 
-#include <asm/arcregs.h>
-#include <asm/tlb.h>
 #include <linux/sched/mm.h>
 
+#include <asm/tlb.h>
 #include <asm-generic/mm_hooks.h>
 
-/*             ARC700 ASID Management
+/*             ARC ASID Management
+ *
+ * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on
+ * context-switch.
  *
- * ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries
- * with same vaddr (different tasks) to co-exit. This provides for
- * "Fast Context Switch" i.e. no TLB flush on ctxt-switch
+ * ASID is managed per cpu, so task threads across CPUs can have different
+ * ASID. Global ASID management is needed if hardware supports TLB shootdown
+ * and/or shared TLB across cores, which ARC doesn't.
  *
- * Linux assigns each task a unique ASID. A simple round-robin allocation
- * of H/w ASID is done using software tracker @asid_cpu.
- * When it reaches max 255, the allocation cycle starts afresh by flushing
- * the entire TLB and wrapping ASID back to zero.
+ * Each task is assigned unique ASID, with a simple round-robin allocator
+ * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started
+ * over from 0, and TLB is flushed
  *
  * A new allocation cycle, post rollover, could potentially reassign an ASID
  * to a different task. Thus the rule is to refresh the ASID in a new cycle.
@@ -93,7 +94,7 @@ static inline void get_new_mmu_context(struct mm_struct *mm)
        asid_mm(mm, cpu) = asid_cpu(cpu);
 
 set_hw:
-       write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE);
+       mmu_setup_asid(mm, hw_pid(mm, cpu));
 
        local_irq_restore(flags);
 }
@@ -146,10 +147,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
         */
        cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifdef ARC_USE_SCRATCH_REG
-       /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */
-       write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
+       mmu_setup_pgd(next, next->pgd);
 
        get_new_mmu_context(next);
 }
index 4a9d333..9a62e1d 100644 (file)
@@ -34,57 +34,55 @@ void copy_user_highpage(struct page *to, struct page *from,
                        unsigned long u_vaddr, struct vm_area_struct *vma);
 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page);
 
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/*
- * These are used to make use of C type-checking..
- */
-typedef struct {
-#ifdef CONFIG_ARC_HAS_PAE40
-       unsigned long long pte;
-#else
-       unsigned long pte;
-#endif
-} pte_t;
 typedef struct {
        unsigned long pgd;
 } pgd_t;
+
+#define pgd_val(x)     ((x).pgd)
+#define __pgd(x)       ((pgd_t) { (x) })
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
 typedef struct {
-       unsigned long pgprot;
-} pgprot_t;
+       unsigned long pud;
+} pud_t;
 
-#define pte_val(x)      ((x).pte)
-#define pgd_val(x)      ((x).pgd)
-#define pgprot_val(x)   ((x).pgprot)
+#define pud_val(x)             ((x).pud)
+#define __pud(x)               ((pud_t) { (x) })
 
-#define __pte(x)        ((pte_t) { (x) })
-#define __pgd(x)        ((pgd_t) { (x) })
-#define __pgprot(x)     ((pgprot_t) { (x) })
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
 
-#define pte_pgprot(x) __pgprot(pte_val(x))
+typedef struct {
+       unsigned long pmd;
+} pmd_t;
 
-#else /* !STRICT_MM_TYPECHECKS */
+#define pmd_val(x)     ((x).pmd)
+#define __pmd(x)       ((pmd_t) { (x) })
 
+#endif
+
+typedef struct {
 #ifdef CONFIG_ARC_HAS_PAE40
-typedef unsigned long long pte_t;
+       unsigned long long pte;
 #else
-typedef unsigned long pte_t;
+       unsigned long pte;
 #endif
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
+} pte_t;
 
-#define pte_val(x)     (x)
-#define pgd_val(x)     (x)
-#define pgprot_val(x)  (x)
-#define __pte(x)       (x)
-#define __pgd(x)       (x)
-#define __pgprot(x)    (x)
-#define pte_pgprot(x)  (x)
+#define pte_val(x)     ((x).pte)
+#define __pte(x)       ((pte_t) { (x) })
 
-#endif
+typedef struct {
+       unsigned long pgprot;
+} pgprot_t;
+
+#define pgprot_val(x)  ((x).pgprot)
+#define __pgprot(x)    ((pgprot_t) { (x) })
+#define pte_pgprot(x)  __pgprot(pte_val(x))
 
-typedef pte_t * pgtable_t;
+typedef struct page *pgtable_t;
 
 /*
  * Use virt_to_pfn with caution:
@@ -122,8 +120,8 @@ extern int pfn_valid(unsigned long pfn);
  * virt here means link-address/program-address as embedded in object code.
  * And for ARC, link-addr = physical address
  */
-#define __pa(vaddr)  ((unsigned long)(vaddr))
-#define __va(paddr)  ((void *)((unsigned long)(paddr)))
+#define __pa(vaddr)            ((unsigned long)(vaddr))
+#define __va(paddr)            ((void *)((unsigned long)(paddr)))
 
 #define virt_to_page(kaddr)    pfn_to_page(virt_to_pfn(kaddr))
 #define virt_addr_valid(kaddr)  pfn_valid(virt_to_pfn(kaddr))
index a32ca31..096b8ef 100644 (file)
 
 #include <linux/mm.h>
 #include <linux/log2.h>
+#include <asm-generic/pgalloc.h>
 
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-       pmd_set(pmd, pte);
+       /*
+        * The cast to long below is OK in 32-bit PAE40 regime with long long pte
+        * Despite "wider" pte, the pte table needs to be in non-PAE low memory
+        * as all higher levels can only hold long pointers.
+        *
+        * The cast itself is needed given simplistic definition of set_pmd()
+        */
+       set_pmd(pmd, __pmd((unsigned long)pte));
 }
 
-static inline void
-pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page)
 {
-       pmd_set(pmd, (pte_t *) ptep);
-}
-
-static inline int __get_order_pgd(void)
-{
-       return get_order(PTRS_PER_PGD * sizeof(pgd_t));
+       set_pmd(pmd, __pmd((unsigned long)page_address(pte_page)));
 }
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-       int num, num2;
-       pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd());
+       pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL);
 
        if (ret) {
+               int num, num2;
                num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE;
                memzero(ret, num * sizeof(pgd_t));
 
@@ -68,64 +70,27 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
        return ret;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-       free_pages((unsigned long)pgd, __get_order_pgd());
-}
-
-
-/*
- * With software-only page-tables, addr-split for traversal is tweakable and
- * that directly governs how big tables would be at each level.
- * Further, the MMU page size is configurable.
- * Thus we need to programatically assert the size constraint
- * All of this is const math, allowing gcc to do constant folding/propagation.
- */
+#if CONFIG_PGTABLE_LEVELS > 3
 
-static inline int __get_order_pte(void)
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp)
 {
-       return get_order(PTRS_PER_PTE * sizeof(pte_t));
+       set_p4d(p4dp, __p4d((unsigned long)pudp));
 }
 
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
-       pte_t *pte;
+#define __pud_free_tlb(tlb, pmd, addr)  pud_free((tlb)->mm, pmd)
 
-       pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                        __get_order_pte());
+#endif
 
-       return pte;
-}
+#if CONFIG_PGTABLE_LEVELS > 2
 
-static inline pgtable_t
-pte_alloc_one(struct mm_struct *mm)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-       pgtable_t pte_pg;
-       struct page *page;
-
-       pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte());
-       if (!pte_pg)
-               return 0;
-       memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
-       page = virt_to_page(pte_pg);
-       if (!pgtable_pte_page_ctor(page)) {
-               __free_page(page);
-               return 0;
-       }
-
-       return pte_pg;
+       set_pud(pudp, __pud((unsigned long)pmdp));
 }
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-       free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */
-}
+#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
 
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
-{
-       pgtable_pte_page_dtor(virt_to_page(ptep));
-       free_pages((unsigned long)ptep, __get_order_pte());
-}
+#endif
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
new file mode 100644 (file)
index 0000000..183d23b
--- /dev/null
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS)
+ * There correspond to the corresponding bits in the TLB
+ */
+
+#ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H
+#define _ASM_ARC_PGTABLE_BITS_ARCV2_H
+
+#ifdef CONFIG_ARC_CACHE_PAGES
+#define _PAGE_CACHEABLE                (1 << 0)  /* Cached (H) */
+#else
+#define _PAGE_CACHEABLE                0
+#endif
+
+#define _PAGE_EXECUTE          (1 << 1)  /* User Execute  (H) */
+#define _PAGE_WRITE            (1 << 2)  /* User Write    (H) */
+#define _PAGE_READ             (1 << 3)  /* User Read     (H) */
+#define _PAGE_ACCESSED         (1 << 4)  /* Accessed      (s) */
+#define _PAGE_DIRTY            (1 << 5)  /* Modified      (s) */
+#define _PAGE_SPECIAL          (1 << 6)
+#define _PAGE_GLOBAL           (1 << 8)  /* ASID agnostic (H) */
+#define _PAGE_PRESENT          (1 << 9)  /* PTE/TLB Valid (H) */
+
+#ifdef CONFIG_ARC_MMU_V4
+#define _PAGE_HW_SZ            (1 << 10)  /* Normal/super (H) */
+#else
+#define _PAGE_HW_SZ            0
+#endif
+
+/* Defaults for every user page */
+#define ___DEF         (_PAGE_PRESENT | _PAGE_CACHEABLE)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+                                                          _PAGE_SPECIAL)
+
+/* More Abbrevaited helpers */
+#define PAGE_U_NONE     __pgprot(___DEF)
+#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
+#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
+#define PAGE_U_X_W_R    __pgprot(___DEF \
+                               | _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+#define PAGE_KERNEL     __pgprot(___DEF | _PAGE_GLOBAL \
+                               | _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE)
+
+#define PAGE_SHARED    PAGE_U_W_R
+
+#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
+
+/*
+ * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+ *
+ * Certain cases have 1:1 mapping
+ *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
+ *       which directly corresponds to  PAGE_U_X_R
+ *
+ * Other rules which cause the divergence from 1:1 mapping
+ *
+ *  1. Although ARC700 can do exclusive execute/write protection (meaning R
+ *     can be tracked independet of X/W unlike some other CPUs), still to
+ *     keep things consistent with other archs:
+ *      -Write implies Read:   W => R
+ *      -Execute implies Read: X => R
+ *
+ *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
+ *     This is to enable COW mechanism
+ */
+       /* xwr */
+#define __P000  PAGE_U_NONE
+#define __P001  PAGE_U_R
+#define __P010  PAGE_U_R       /* Pvt-W => !W */
+#define __P011  PAGE_U_R       /* Pvt-W => !W */
+#define __P100  PAGE_U_X_R     /* X => R */
+#define __P101  PAGE_U_X_R
+#define __P110  PAGE_U_X_R     /* Pvt-W => !W and X => R */
+#define __P111  PAGE_U_X_R     /* Pvt-W => !W */
+
+#define __S000  PAGE_U_NONE
+#define __S001  PAGE_U_R
+#define __S010  PAGE_U_W_R     /* W => R */
+#define __S011  PAGE_U_W_R
+#define __S100  PAGE_U_X_R     /* X => R */
+#define __S101  PAGE_U_X_R
+#define __S110  PAGE_U_X_W_R   /* X => R */
+#define __S111  PAGE_U_X_W_R
+
+#ifndef __ASSEMBLY__
+
+#define pte_write(pte)         (pte_val(pte) & _PAGE_WRITE)
+#define pte_dirty(pte)         (pte_val(pte) & _PAGE_DIRTY)
+#define pte_young(pte)         (pte_val(pte) & _PAGE_ACCESSED)
+#define pte_special(pte)       (pte_val(pte) & _PAGE_SPECIAL)
+
+#define PTE_BIT_FUNC(fn, op) \
+       static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
+
+PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
+PTE_BIT_FUNC(wrprotect,        &= ~(_PAGE_WRITE));
+PTE_BIT_FUNC(mkwrite,  |= (_PAGE_WRITE));
+PTE_BIT_FUNC(mkclean,  &= ~(_PAGE_DIRTY));
+PTE_BIT_FUNC(mkdirty,  |= (_PAGE_DIRTY));
+PTE_BIT_FUNC(mkold,    &= ~(_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkyoung,  |= (_PAGE_ACCESSED));
+PTE_BIT_FUNC(mkspecial,        |= (_PAGE_SPECIAL));
+PTE_BIT_FUNC(mkhuge,   |= (_PAGE_HW_SZ));
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+       return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+                             pte_t *ptep, pte_t pteval)
+{
+       set_pte(ptep, pteval);
+}
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+                     pte_t *ptep);
+
+/* Encode swap {type,off} tuple into PTE
+ * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
+ * PAGE_PRESENT is zero in a PTE holding swap "identifier"
+ */
+#define __swp_entry(type, off)         ((swp_entry_t) \
+                                       { ((type) & 0x1f) | ((off) << 13) })
+
+/* Decode a PTE containing swap "identifier "into constituents */
+#define __swp_type(pte_lookalike)      (((pte_lookalike).val) & 0x1f)
+#define __swp_offset(pte_lookalike)    ((pte_lookalike).val >> 13)
+
+#define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)          ((pte_t) { (x).val })
+
+#define kern_addr_valid(addr)  (1)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <asm/hugepage.h>
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif
diff --git a/arch/arc/include/asm/pgtable-levels.h b/arch/arc/include/asm/pgtable-levels.h
new file mode 100644 (file)
index 0000000..8084ef2
--- /dev/null
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/*
+ * Helpers for implemenintg paging levels
+ */
+
+#ifndef _ASM_ARC_PGTABLE_LEVELS_H
+#define _ASM_ARC_PGTABLE_LEVELS_H
+
+#if CONFIG_PGTABLE_LEVELS == 2
+
+/*
+ * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS)
+ *
+ * [31]            32 bit virtual address              [0]
+ * -------------------------------------------------------
+ * |               | <---------- PGDIR_SHIFT ----------> |
+ * |               |                | <-- PAGE_SHIFT --> |
+ * -------------------------------------------------------
+ *       |                  |                |
+ *       |                  |                --> off in page frame
+ *       |                  ---> index into Page Table
+ *       ----> index into Page Directory
+ *
+ * Given software walk, the vaddr split is arbitrary set to 11:8:13
+ * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to
+ * super page size.
+ */
+
+#if defined(CONFIG_ARC_HUGEPAGE_16M)
+#define PGDIR_SHIFT            24
+#elif defined(CONFIG_ARC_HUGEPAGE_2M)
+#define PGDIR_SHIFT            21
+#else
+/*
+ * No Super page case
+ * Default value provides 11:8:13 (8K), 10:10:12 (4K)
+ * Limits imposed by pgtable_t only PAGE_SIZE long
+ * (so 4K page can only have 1K entries: or 10 bits)
+ */
+#ifdef CONFIG_ARC_PAGE_SIZE_4K
+#define PGDIR_SHIFT            22
+#else
+#define PGDIR_SHIFT            21
+#endif
+
+#endif
+
+#else /* CONFIG_PGTABLE_LEVELS != 2 */
+
+/*
+ * A default 3 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <7> : <8> : <13>
+ * A default 4 level paging testing setup in software walked MMU
+ *   MMUv4 (8K page): <4> : <3> : <4> : <8> : <13>
+ */
+#define PGDIR_SHIFT            28
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SHIFT              25
+#endif
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SHIFT              21
+#endif
+
+#endif /* CONFIG_PGTABLE_LEVELS */
+
+#define PGDIR_SIZE             BIT(PGDIR_SHIFT)
+#define PGDIR_MASK             (~(PGDIR_SIZE - 1))
+#define PTRS_PER_PGD           BIT(32 - PGDIR_SHIFT)
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#define PUD_SIZE               BIT(PUD_SHIFT)
+#define PUD_MASK               (~(PUD_SIZE - 1))
+#define PTRS_PER_PUD           BIT(PGDIR_SHIFT - PUD_SHIFT)
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+#define PMD_SIZE               BIT(PMD_SHIFT)
+#define PMD_MASK               (~(PMD_SIZE - 1))
+#define PTRS_PER_PMD           BIT(PUD_SHIFT - PMD_SHIFT)
+#endif
+
+#define PTRS_PER_PTE           BIT(PMD_SHIFT - PAGE_SHIFT)
+
+#ifndef __ASSEMBLY__
+
+#if CONFIG_PGTABLE_LEVELS > 3
+#include <asm-generic/pgtable-nop4d.h>
+#elif CONFIG_PGTABLE_LEVELS > 2
+#include <asm-generic/pgtable-nopud.h>
+#else
+#include <asm-generic/pgtable-nopmd.h>
+#endif
+
+/*
+ * 1st level paging: pgd
+ */
+#define pgd_index(addr)                ((addr) >> PGDIR_SHIFT)
+#define pgd_offset(mm, addr)   (((mm)->pgd) + pgd_index(addr))
+#define pgd_offset_k(addr)     pgd_offset(&init_mm, addr)
+#define pgd_ERROR(e) \
+       pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+#if CONFIG_PGTABLE_LEVELS > 3
+
+/* In 4 level paging, p4d_* macros work on pgd */
+#define p4d_none(x)            (!p4d_val(x))
+#define p4d_bad(x)             ((p4d_val(x) & ~PAGE_MASK))
+#define p4d_present(x)         (p4d_val(x))
+#define p4d_clear(xp)          do { p4d_val(*(xp)) = 0; } while (0)
+#define p4d_pgtable(p4d)       ((pud_t *)(p4d_val(p4d) & PAGE_MASK))
+#define p4d_page(p4d)          virt_to_page(p4d_pgtable(p4d))
+#define set_p4d(p4dp, p4d)     (*(p4dp) = p4d)
+
+/*
+ * 2nd level paging: pud
+ */
+#define pud_ERROR(e) \
+       pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+
+/*
+ * In 3 level paging, pud_* macros work on pgd
+ * In 4 level paging, pud_* macros work on pud
+ */
+#define pud_none(x)            (!pud_val(x))
+#define pud_bad(x)             ((pud_val(x) & ~PAGE_MASK))
+#define pud_present(x)         (pud_val(x))
+#define pud_clear(xp)          do { pud_val(*(xp)) = 0; } while (0)
+#define pud_pgtable(pud)       ((pmd_t *)(pud_val(pud) & PAGE_MASK))
+#define pud_page(pud)          virt_to_page(pud_pgtable(pud))
+#define set_pud(pudp, pud)     (*(pudp) = pud)
+
+/*
+ * 3rd level paging: pmd
+ */
+#define pmd_ERROR(e) \
+       pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+
+#define pmd_pfn(pmd)           ((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot)      __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+#define mk_pmd(page,prot)      pfn_pmd(page_to_pfn(page),prot)
+
+#endif
+
+/*
+ * Due to the strange way generic pgtable level folding works, the pmd_* macros
+ *  - are valid even for 2 levels (which supposedly only has pgd - pte)
+ *  - behave differently for 2 vs. 3
+ * In 2  level paging        (pgd -> pte), pmd_* macros work on pgd
+ * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd
+ */
+#define pmd_none(x)            (!pmd_val(x))
+#define pmd_bad(x)             ((pmd_val(x) & ~PAGE_MASK))
+#define pmd_present(x)         (pmd_val(x))
+#define pmd_clear(xp)          do { pmd_val(*(xp)) = 0; } while (0)
+#define pmd_page_vaddr(pmd)    (pmd_val(pmd) & PAGE_MASK)
+#define pmd_page(pmd)          virt_to_page(pmd_page_vaddr(pmd))
+#define set_pmd(pmdp, pmd)     (*(pmdp) = pmd)
+#define pmd_pgtable(pmd)       ((pgtable_t) pmd_page_vaddr(pmd))
+
+/*
+ * 4th level paging: pte
+ */
+#define pte_ERROR(e) \
+       pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+
+#define pte_none(x)            (!pte_val(x))
+#define pte_present(x)         (pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm,addr,ptep)        set_pte_at(mm, addr, ptep, __pte(0))
+#define pte_page(pte)          pfn_to_page(pte_pfn(pte))
+#define set_pte(ptep, pte)     ((*(ptep)) = (pte))
+#define pte_pfn(pte)           (pte_val(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn, prot)     __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
+#define mk_pte(page, prot)     pfn_pte(page_to_pfn(page), prot)
+
+#ifdef CONFIG_ISA_ARCV2
+#define pmd_leaf(x)            (pmd_val(x) & _PAGE_HW_SZ)
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
index 320cc0a..9320b04 100644 (file)
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * vineetg: May 2011
- *  -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1.
- *     They are semantically the same although in different contexts
- *     VALID marks a TLB entry exists and it will only happen if PRESENT
- *  - Utilise some unused free bits to confine PTE flags to 12 bits
- *     This is a must for 4k pg-sz
- *
- * vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods
- *  -TLB Locking never really existed, except for initial specs
- *  -SILENT_xxx not needed for our port
- *  -Per my request, MMU V3 changes the layout of some of the bits
- *     to avoid a few shifts in TLB Miss handlers.
- *
- * vineetg: April 2010
- *  -PGD entry no longer contains any flags. If empty it is 0, otherwise has
- *   Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler
- *
- * vineetg: April 2010
- *  -Switched form 8:11:13 split for page table lookup to 11:8:13
- *  -this speeds up page table allocation itself as we now have to memset 1K
- *    instead of 8k per page table.
- * -TODO: Right now page table alloc is 8K and rest 7K is unused
- *    need to optimise it
- *
- * Amit Bhor, Sameer Dhavale: Codito Technologies 2004
  */
 
 #ifndef _ASM_ARC_PGTABLE_H
 #define _ASM_ARC_PGTABLE_H
 
 #include <linux/bits.h>
-#include <asm-generic/pgtable-nopmd.h>
-#include <asm/page.h>
-#include <asm/mmu.h>   /* to propagate CONFIG_ARC_MMU_VER <n> */
-
-/**************************************************************************
- * Page Table Flags
- *
- * ARC700 MMU only deals with softare managed TLB entries.
- * Page Tables are purely for Linux VM's consumption and the bits below are
- * suited to that (uniqueness). Hence some are not implemented in the TLB and
- * some have different value in TLB.
- * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in
- *      seperate PD0 and PD1, which combined forms a translation entry)
- *      while for PTE perspective, they are 8 and 9 respectively
- * with MMU v3: Most bits (except SHARED) represent the exact hardware pos
- *      (saves some bit shift ops in TLB Miss hdlrs)
- */
-
-#if (CONFIG_ARC_MMU_VER <= 2)
-
-#define _PAGE_ACCESSED      (1<<1)     /* Page is accessed (S) */
-#define _PAGE_CACHEABLE     (1<<2)     /* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<3)     /* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<4)     /* Page has user write perm (H) */
-#define _PAGE_READ          (1<<5)     /* Page has user read perm (H) */
-#define _PAGE_DIRTY         (1<<6)     /* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<7)
-#define _PAGE_GLOBAL        (1<<8)     /* Page is global (H) */
-#define _PAGE_PRESENT       (1<<10)    /* TLB entry is valid (H) */
-
-#else  /* MMU v3 onwards */
-
-#define _PAGE_CACHEABLE     (1<<0)     /* Page is cached (H) */
-#define _PAGE_EXECUTE       (1<<1)     /* Page has user execute perm (H) */
-#define _PAGE_WRITE         (1<<2)     /* Page has user write perm (H) */
-#define _PAGE_READ          (1<<3)     /* Page has user read perm (H) */
-#define _PAGE_ACCESSED      (1<<4)     /* Page is accessed (S) */
-#define _PAGE_DIRTY         (1<<5)     /* Page modified (dirty) (S) */
-#define _PAGE_SPECIAL       (1<<6)
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_WTHRU         (1<<7)     /* Page cache mode write-thru (H) */
-#endif
-
-#define _PAGE_GLOBAL        (1<<8)     /* Page is global (H) */
-#define _PAGE_PRESENT       (1<<9)     /* TLB entry is valid (H) */
-
-#if (CONFIG_ARC_MMU_VER >= 4)
-#define _PAGE_HW_SZ         (1<<10)    /* Page Size indicator (H): 0 normal, 1 super */
-#endif
-
-#define _PAGE_SHARED_CODE   (1<<11)    /* Shared Code page with cmn vaddr
-                                          usable for shared TLB entries (H) */
-
-#define _PAGE_UNUSED_BIT    (1<<12)
-#endif
-
-/* vmalloc permissions */
-#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
-                       _PAGE_GLOBAL | _PAGE_PRESENT)
-
-#ifndef CONFIG_ARC_CACHE_PAGES
-#undef _PAGE_CACHEABLE
-#define _PAGE_CACHEABLE 0
-#endif
 
-#ifndef _PAGE_HW_SZ
-#define _PAGE_HW_SZ    0
-#endif
-
-/* Defaults for every user page */
-#define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
-
-/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
-                                                          _PAGE_SPECIAL)
-/* More Abbrevaited helpers */
-#define PAGE_U_NONE     __pgprot(___DEF)
-#define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
-#define PAGE_U_W_R      __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE)
-#define PAGE_U_X_R      __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE)
-#define PAGE_U_X_W_R    __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \
-                                                      _PAGE_EXECUTE)
-
-#define PAGE_SHARED    PAGE_U_W_R
-
-/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
- * user vaddr space - visible in all addr spaces, but kernel mode only
- * Thus Global, all-kernel-access, no-user-access, cached
- */
-#define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE)
-
-/* ioremap */
-#define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
-
-/* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0                (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
-#define PTE_BITS_RWX           (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
-
-#define PTE_BITS_NON_RWX_IN_PD1        (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
-
-/**************************************************************************
- * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
- *
- * Certain cases have 1:1 mapping
- *  e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED
- *       which directly corresponds to  PAGE_U_X_R
- *
- * Other rules which cause the divergence from 1:1 mapping
- *
- *  1. Although ARC700 can do exclusive execute/write protection (meaning R
- *     can be tracked independet of X/W unlike some other CPUs), still to
- *     keep things consistent with other archs:
- *      -Write implies Read:   W => R
- *      -Execute implies Read: X => R
- *
- *  2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W
- *     This is to enable COW mechanism
- */
-       /* xwr */
-#define __P000  PAGE_U_NONE
-#define __P001  PAGE_U_R
-#define __P010  PAGE_U_R       /* Pvt-W => !W */
-#define __P011  PAGE_U_R       /* Pvt-W => !W */
-#define __P100  PAGE_U_X_R     /* X => R */
-#define __P101  PAGE_U_X_R
-#define __P110  PAGE_U_X_R     /* Pvt-W => !W and X => R */
-#define __P111  PAGE_U_X_R     /* Pvt-W => !W */
-
-#define __S000  PAGE_U_NONE
-#define __S001  PAGE_U_R
-#define __S010  PAGE_U_W_R     /* W => R */
-#define __S011  PAGE_U_W_R
-#define __S100  PAGE_U_X_R     /* X => R */
-#define __S101  PAGE_U_X_R
-#define __S110  PAGE_U_X_W_R   /* X => R */
-#define __S111  PAGE_U_X_W_R
-
-/****************************************************************
- * 2 tier (PGD:PTE) software page walker
- *
- * [31]                    32 bit virtual address              [0]
- * -------------------------------------------------------
- * |               | <------------ PGDIR_SHIFT ----------> |
- * |              |                                     |
- * | BITS_FOR_PGD  |  BITS_FOR_PTE  | <-- PAGE_SHIFT --> |
- * -------------------------------------------------------
- *       |                  |                |
- *       |                  |                --> off in page frame
- *       |                  ---> index into Page Table
- *       ----> index into Page Directory
- *
- * In a single page size configuration, only PAGE_SHIFT is fixed
- * So both PGD and PTE sizing can be tweaked
- *  e.g. 8K page (PAGE_SHIFT 13) can have
- *  - PGDIR_SHIFT 21  -> 11:8:13 address split
- *  - PGDIR_SHIFT 24  -> 8:11:13 address split
- *
- * If Super Page is configured, PGDIR_SHIFT becomes fixed too,
- * so the sizing flexibility is gone.
- */
-
-#if defined(CONFIG_ARC_HUGEPAGE_16M)
-#define PGDIR_SHIFT    24
-#elif defined(CONFIG_ARC_HUGEPAGE_2M)
-#define PGDIR_SHIFT    21
-#else
-/*
- * Only Normal page support so "hackable" (see comment above)
- * Default value provides 11:8:13 (8K), 11:9:12 (4K)
- */
-#define PGDIR_SHIFT    21
-#endif
-
-#define BITS_FOR_PTE   (PGDIR_SHIFT - PAGE_SHIFT)
-#define BITS_FOR_PGD   (32 - PGDIR_SHIFT)
-
-#define PGDIR_SIZE     BIT(PGDIR_SHIFT)        /* vaddr span, not PDG sz */
-#define PGDIR_MASK     (~(PGDIR_SIZE-1))
-
-#define        PTRS_PER_PTE    BIT(BITS_FOR_PTE)
-#define        PTRS_PER_PGD    BIT(BITS_FOR_PGD)
+#include <asm/pgtable-levels.h>
+#include <asm/pgtable-bits-arcv2.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
 
 /*
  * Number of entries a user land program use.
  */
 #define        USER_PTRS_PER_PGD       (TASK_SIZE / PGDIR_SIZE)
 
-
-/****************************************************************
- * Bucket load of VM Helpers
- */
-
 #ifndef __ASSEMBLY__
 
-#define pte_ERROR(e) \
-       pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pgd_ERROR(e) \
-       pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/* the zero page used for uninitialized and anonymous pages */
 extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr)       (virt_to_page(empty_zero_page))
 
-#define set_pte(pteptr, pteval)        ((*(pteptr)) = (pteval))
-#define set_pmd(pmdptr, pmdval)        (*(pmdptr) = pmdval)
-
-/* find the page descriptor of the Page Tbl ref by PMD entry */
-#define pmd_page(pmd)          virt_to_page(pmd_val(pmd) & PAGE_MASK)
-
-/* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */
-#define pmd_page_vaddr(pmd)    (pmd_val(pmd) & PAGE_MASK)
-
-/* In a 2 level sys, setup the PGD entry with PTE value */
-static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
-{
-       pmd_val(*pmdp) = (unsigned long)ptep;
-}
-
-#define pte_none(x)                    (!pte_val(x))
-#define pte_present(x)                 (pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm, addr, ptep)      set_pte_at(mm, addr, ptep, __pte(0))
-
-#define pmd_none(x)                    (!pmd_val(x))
-#define        pmd_bad(x)                      ((pmd_val(x) & ~PAGE_MASK))
-#define pmd_present(x)                 (pmd_val(x))
-#define pmd_leaf(x)                    (pmd_val(x) & _PAGE_HW_SZ)
-#define pmd_clear(xp)                  do { pmd_val(*(xp)) = 0; } while (0)
-
-#define pte_page(pte)          pfn_to_page(pte_pfn(pte))
-#define mk_pte(page, prot)     pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)     __pte(__pfn_to_phys(pfn) | pgprot_val(prot))
-
-/* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
-#define pte_pfn(pte)           (pte_val(pte) >> PAGE_SHIFT)
-
-/* Zoo of pte_xxx function */
-#define pte_read(pte)          (pte_val(pte) & _PAGE_READ)
-#define pte_write(pte)         (pte_val(pte) & _PAGE_WRITE)
-#define pte_dirty(pte)         (pte_val(pte) & _PAGE_DIRTY)
-#define pte_young(pte)         (pte_val(pte) & _PAGE_ACCESSED)
-#define pte_special(pte)       (pte_val(pte) & _PAGE_SPECIAL)
-
-#define PTE_BIT_FUNC(fn, op) \
-       static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; }
-
-PTE_BIT_FUNC(mknotpresent,     &= ~(_PAGE_PRESENT));
-PTE_BIT_FUNC(wrprotect,        &= ~(_PAGE_WRITE));
-PTE_BIT_FUNC(mkwrite,  |= (_PAGE_WRITE));
-PTE_BIT_FUNC(mkclean,  &= ~(_PAGE_DIRTY));
-PTE_BIT_FUNC(mkdirty,  |= (_PAGE_DIRTY));
-PTE_BIT_FUNC(mkold,    &= ~(_PAGE_ACCESSED));
-PTE_BIT_FUNC(mkyoung,  |= (_PAGE_ACCESSED));
-PTE_BIT_FUNC(exprotect,        &= ~(_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkexec,   |= (_PAGE_EXECUTE));
-PTE_BIT_FUNC(mkspecial,        |= (_PAGE_SPECIAL));
-PTE_BIT_FUNC(mkhuge,   |= (_PAGE_HW_SZ));
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-       return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
+extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
 
 /* Macro to mark a page protection as uncacheable */
 #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE))
 
-static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
-                             pte_t *ptep, pte_t pteval)
-{
-       set_pte(ptep, pteval);
-}
-
-/*
- * Macro to quickly access the PGD entry, utlising the fact that some
- * arch may cache the pointer to Page Directory of "current" task
- * in a MMU register
- *
- * Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply
- * becomes read a register
- *
- * ********CAUTION*******:
- * Kernel code might be dealing with some mm_struct of NON "current"
- * Thus use this macro only when you are certain that "current" is current
- * e.g. when dealing with signal frame setup code etc
- */
-#ifdef ARC_USE_SCRATCH_REG
-#define pgd_offset_fast(mm, addr)      \
-({                                     \
-       pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0);  \
-       pgd_base + pgd_index(addr);     \
-})
-#else
-#define pgd_offset_fast(mm, addr)      pgd_offset(mm, addr)
-#endif
-
 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE);
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-                     pte_t *ptep);
-
-/* Encode swap {type,off} tuple into PTE
- * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that
- * PAGE_PRESENT is zero in a PTE holding swap "identifier"
- */
-#define __swp_entry(type, off) ((swp_entry_t) { \
-                                       ((type) & 0x1f) | ((off) << 13) })
-
-/* Decode a PTE containing swap "identifier "into constituents */
-#define __swp_type(pte_lookalike)      (((pte_lookalike).val) & 0x1f)
-#define __swp_offset(pte_lookalike)    ((pte_lookalike).val >> 13)
-
-/* NOPs, to keep generic kernel happy */
-#define __pte_to_swp_entry(pte)        ((swp_entry_t) { pte_val(pte) })
-#define __swp_entry_to_pte(x)  ((pte_t) { (x).val })
-
-#define kern_addr_valid(addr)  (1)
-
-#define pmd_pgtable(pmd)       ((pgtable_t) pmd_page_vaddr(pmd))
-
-/*
- * remap a physical page `pfn' of size `size' with page protection `prot'
- * into virtual address `from'
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#include <asm/hugepage.h>
-#endif
 
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
index e4031ec..f28afcf 100644 (file)
@@ -93,7 +93,7 @@ extern unsigned int get_wchan(struct task_struct *p);
 #define VMALLOC_START  (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20))
 
 /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */
-#define VMALLOC_SIZE   ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4)
+#define VMALLOC_SIZE   ((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4)
 
 #define VMALLOC_END    (VMALLOC_START + VMALLOC_SIZE)
 
index 01f8547..028a8cf 100644 (file)
@@ -2,8 +2,8 @@
 /*
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  */
-#ifndef __ASMARC_SETUP_H
-#define __ASMARC_SETUP_H
+#ifndef __ASM_ARC_SETUP_H
+#define __ASM_ARC_SETUP_H
 
 
 #include <linux/types.h>
@@ -34,4 +34,12 @@ long __init arc_get_mem_sz(void);
 #define IS_AVAIL2(v, s, cfg)   IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
 #define IS_AVAIL3(v, v2, s)    IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
+extern void arc_mmu_init(void);
+extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_mmu_bcr(void);
+
+extern void arc_cache_init(void);
+extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+extern void read_decode_cache_bcr(void);
+
 #endif /* __ASMARC_SETUP_H */
index c5de400..d856491 100644 (file)
@@ -105,7 +105,6 @@ static inline const char *arc_platform_smp_cpuinfo(void)
 #include <asm/spinlock.h>
 
 extern arch_spinlock_t smp_atomic_ops_lock;
-extern arch_spinlock_t smp_bitops_lock;
 
 #define atomic_ops_lock(flags) do {            \
        local_irq_save(flags);                  \
@@ -117,24 +116,11 @@ extern arch_spinlock_t smp_bitops_lock;
        local_irq_restore(flags);               \
 } while (0)
 
-#define bitops_lock(flags)     do {            \
-       local_irq_save(flags);                  \
-       arch_spin_lock(&smp_bitops_lock);       \
-} while (0)
-
-#define bitops_unlock(flags) do {              \
-       arch_spin_unlock(&smp_bitops_lock);     \
-       local_irq_restore(flags);               \
-} while (0)
-
 #else /* !CONFIG_SMP */
 
 #define atomic_ops_lock(flags)         local_irq_save(flags)
 #define atomic_ops_unlock(flags)       local_irq_restore(flags)
 
-#define bitops_lock(flags)             local_irq_save(flags)
-#define bitops_unlock(flags)           local_irq_restore(flags)
-
 #endif /* !CONFIG_SMP */
 
 #endif /* !CONFIG_ARC_HAS_LLSC */
diff --git a/arch/arc/include/asm/tlb-mmu1.h b/arch/arc/include/asm/tlb-mmu1.h
deleted file mode 100644 (file)
index a3083b3..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- */
-
-#ifndef __ASM_TLB_MMU_V1_H__
-#define __ASM_TLB_MMU_V1_H__
-
-#include <asm/mmu.h>
-
-#if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1)
-
-.macro TLB_WRITE_HEURISTICS
-
-#define JH_HACK1
-#undef JH_HACK2
-#undef JH_HACK3
-
-#ifdef JH_HACK3
-; Calculate set index for 2-way MMU
-; -avoiding use of GetIndex from MMU
-;   and its unpleasant LFSR pseudo-random sequence
-;
-; r1 = TLBPD0 from TLB_RELOAD above
-;
-; -- jh_ex_way_set not cleared on startup
-;    didn't want to change setup.c
-;    hence extra instruction to clean
-;
-; -- should be in cache since in same line
-;    as r0/r1 saves above
-;
-ld  r0,[jh_ex_way_sel]  ; victim pointer
-and r0,r0,1         ; clean
-xor.f   r0,r0,1         ; flip
-st  r0,[jh_ex_way_sel]  ; store back
-asr r0,r1,12        ; get set # <<1, note bit 12=R=0
-or.nz   r0,r0,1         ; set way bit
-and r0,r0,0xff      ; clean
-sr  r0,[ARC_REG_TLBINDEX]
-#endif
-
-#ifdef JH_HACK2
-; JH hack #2
-;  Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU
-;  Slower in thrash case (where it matters) because more code is executed
-;  Inefficient due to two-register paradigm of this miss handler
-;
-/* r1 = data TLBPD0 at this point */
-lr      r0,[eret]               /* instruction address */
-xor     r0,r0,r1                /* compare set #       */
-and.f   r0,r0,0x000fe000        /* 2-way MMU mask      */
-bne     88f                     /* not in same set - no need to probe */
-
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-; lr  r1,[ARC_REG_TLBPD0]     /* Data VPN+ASID - already in r1 from TLB_RELOAD*/
-and     r1,r1,0xff              /* Data ASID */
-or      r0,r0,r1                /* Instruction address + Data ASID */
-
-lr      r1,[ARC_REG_TLBPD0]     /* save TLBPD0 containing data TLB*/
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r1,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-xor     r0,r0,1                 /* flip bottom bit of data index */
-b.d     89f
-sr      r0,[ARC_REG_TLBINDEX]   /* and put it back */
-88:
-sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
-89:
-#endif
-
-#ifdef JH_HACK1
-;
-; Always checks whether instruction will be kicked out by dtlb miss
-;
-mov_s   r3, r1                  ; save PD0 prepared by TLB_RELOAD in r3
-lr      r0,[eret]               /* instruction address */
-and     r0,r0,PAGE_MASK         /* VPN of instruction address */
-bmsk    r1,r3,7                 /* Data ASID, bits 7-0 */
-or_s    r0,r0,r1                /* Instruction address + Data ASID */
-
-sr      r0,[ARC_REG_TLBPD0]     /* write instruction address to TLBPD0 */
-sr      TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */
-lr      r0,[ARC_REG_TLBINDEX]   /* r0 = index where instruction is, if at all */
-sr      r3,[ARC_REG_TLBPD0]     /* restore TLBPD0 */
-
-sr      TLBGetIndex, [ARC_REG_TLBCOMMAND]
-lr      r1,[ARC_REG_TLBINDEX]   /* r1 = index where MMU wants to put data */
-cmp     r0,r1                   /* if no match on indices, go around */
-xor.eq  r1,r1,1                 /* flip bottom bit of data index */
-sr      r1,[ARC_REG_TLBINDEX]   /* and put it back */
-#endif
-
-.endm
-
-#endif
-
-#endif
index 12d5f12..a7e6a21 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/errno.h>
 #include <asm/arcregs.h>
 #include <asm/irqflags.h>
+#include <asm/mmu.h>
 
 ; A maximum number of supported interrupts in the core interrupt controller.
 ; This number is not equal to the maximum interrupt number (256) because
index 2cb8dfe..dd77a0c 100644 (file)
@@ -101,11 +101,8 @@ ENTRY(EV_MachineCheck)
        lr  r0, [efa]
        mov r1, sp
 
-       ; hardware auto-disables MMU, re-enable it to allow kernel vaddr
-       ; access for say stack unwinding of modules for crash dumps
-       lr      r3, [ARC_REG_PID]
-       or      r3, r3, MMU_ENABLE
-       sr      r3, [ARC_REG_PID]
+       ; MC excpetions disable MMU
+       ARC_MMU_REENABLE r3
 
        lsr     r3, r2, 8
        bmsk    r3, r3, 7
index a86641b..6885e42 100644 (file)
@@ -142,7 +142,7 @@ IRQCHIP_DECLARE(arc_intc, "snps,arc700-intc", init_onchip_IRQ);
  *    Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times.
  *    Here local_irq_enable( ) shd not re-enable lower priority interrupts
  * -If called from soft-ISR, it must re-enable all interrupts
- *    soft ISR are low prioity jobs which can be very slow, thus all IRQs
+ *    soft ISR are low priority jobs which can be very slow, thus all IRQs
  *    must be enabled while they run.
  *    Now hardware context wise we may still be in L2 ISR (not done rtie)
  *    still we must re-enable both L1 and L2 IRQs
index db0e104..78e6d06 100644 (file)
 
 #ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 EXPORT_SYMBOL_GPL(smp_atomic_ops_lock);
-EXPORT_SYMBOL_GPL(smp_bitops_lock);
 #endif
 
 struct plat_smp_ops  __weak plat_smp_ops;
@@ -283,7 +281,7 @@ static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg)
        /*
         * Call the platform specific IPI kick function, but avoid if possible:
         * Only do so if there's no pending msg from other concurrent sender(s).
-        * Otherwise, recevier will see this msg as well when it takes the
+        * Otherwise, receiver will see this msg as well when it takes the
         * IPI corresponding to that msg. This is true, even if it is already in
         * IPI handler, because !@old means it has not yet dequeued the msg(s)
         * so @new msg can be a free-loader
index 1b9576d..c376ff3 100644 (file)
@@ -149,7 +149,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
 #else
        /* On ARC, only Dward based unwinder works. fp based backtracing is
         * not possible (-fno-omit-frame-pointer) because of the way function
-        * prelogue is setup (callee regs saved and then fp set and not other
+        * prologue is setup (callee regs saved and then fp set and not other
         * way around
         */
        pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");
index a2fbea3..8aa1231 100644 (file)
@@ -205,93 +205,24 @@ slc_chk:
 #define OP_INV_IC      0x4
 
 /*
- *             I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ * Cache Flush programming model
  *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
+ * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias.
+ * Programming model requires both paddr and vaddr irrespecive of aliasing
+ * considerations:
+ *  - vaddr in {I,D}C_IV?L
+ *  - paddr in {I,D}C_PTAG
  *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
+ * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias.
+ * Programming model is different for aliasing vs. non-aliasing I$
+ *  - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L
+ *  - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$)
  *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggravate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ *  - If PAE40 is enabled, independent of aliasing considerations, the higher
+ *    bits needs to be written into PTAG_HI
  */
 
 static inline
-void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
-                         unsigned long sz, const int op, const int full_page)
-{
-       unsigned int aux_cmd;
-       int num_lines;
-
-       if (op == OP_INV_IC) {
-               aux_cmd = ARC_REG_IC_IVIL;
-       } else {
-               /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
-               aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-       }
-
-       /* Ensure we properly floor/ceil the non-line aligned/sized requests
-        * and have @paddr - aligned to cache line and integral @num_lines.
-        * This however can be avoided for page sized since:
-        *  -@paddr will be cache-line aligned already (being page aligned)
-        *  -@sz will be integral multiple of line size (being page sized).
-        */
-       if (!full_page) {
-               sz += paddr & ~CACHE_LINE_MASK;
-               paddr &= CACHE_LINE_MASK;
-               vaddr &= CACHE_LINE_MASK;
-       }
-
-       num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-
-       /* MMUv2 and before: paddr contains stuffed vaddrs bits */
-       paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-
-       while (num_lines-- > 0) {
-               write_aux_reg(aux_cmd, paddr);
-               paddr += L1_CACHE_BYTES;
-       }
-}
-
-/*
- * For ARC700 MMUv3 I-cache and D-cache flushes
- *  - ARC700 programming model requires paddr and vaddr be passed in seperate
- *    AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the
- *    caches actually alias or not.
- * -  For HS38, only the aliasing I-cache configuration uses the PTAG reg
- *    (non aliasing I-cache version doesn't; while D-cache can't possibly alias)
- */
-static inline
 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
                          unsigned long sz, const int op, const int full_page)
 {
@@ -350,17 +281,6 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
 #ifndef USE_RGN_FLSH
 
 /*
- * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
- * Here's how cache ops are implemented
- *
- *  - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
- *  - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
- *  - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
- *    respectively, similar to MMU v3 programming model, hence
- *    __cache_line_loop_v3() is used)
- *
- * If PAE40 is enabled, independent of aliasing considerations, the higher bits
- * needs to be written into PTAG_HI
  */
 static inline
 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
@@ -460,11 +380,9 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
 
 #endif
 
-#if (CONFIG_ARC_MMU_VER < 3)
-#define __cache_line_loop      __cache_line_loop_v2
-#elif (CONFIG_ARC_MMU_VER == 3)
+#ifdef CONFIG_ARC_MMU_V3
 #define __cache_line_loop      __cache_line_loop_v3
-#elif (CONFIG_ARC_MMU_VER > 3)
+#else
 #define __cache_line_loop      __cache_line_loop_v4
 #endif
 
@@ -1123,7 +1041,7 @@ void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
        clear_page(to);
        clear_bit(PG_dc_clean, &page->flags);
 }
-
+EXPORT_SYMBOL(clear_user_page);
 
 /**********************************************************************
  * Explicit Cache flush request from user space via syscall
index f5657cb..5787c26 100644 (file)
@@ -33,28 +33,34 @@ noinline static int handle_kernel_vaddr_fault(unsigned long address)
        pud_t *pud, *pud_k;
        pmd_t *pmd, *pmd_k;
 
-       pgd = pgd_offset_fast(current->active_mm, address);
+       pgd = pgd_offset(current->active_mm, address);
        pgd_k = pgd_offset_k(address);
 
-       if (!pgd_present(*pgd_k))
+       if (pgd_none (*pgd_k))
                goto bad_area;
+       if (!pgd_present(*pgd))
+               set_pgd(pgd, *pgd_k);
 
        p4d = p4d_offset(pgd, address);
        p4d_k = p4d_offset(pgd_k, address);
-       if (!p4d_present(*p4d_k))
+       if (p4d_none(*p4d_k))
                goto bad_area;
+       if (!p4d_present(*p4d))
+               set_p4d(p4d, *p4d_k);
 
        pud = pud_offset(p4d, address);
        pud_k = pud_offset(p4d_k, address);
-       if (!pud_present(*pud_k))
+       if (pud_none(*pud_k))
                goto bad_area;
+       if (!pud_present(*pud))
+               set_pud(pud, *pud_k);
 
        pmd = pmd_offset(pud, address);
        pmd_k = pmd_offset(pud_k, address);
-       if (!pmd_present(*pmd_k))
+       if (pmd_none(*pmd_k))
                goto bad_area;
-
-       set_pmd(pmd, *pmd_k);
+       if (!pmd_present(*pmd))
+               set_pmd(pmd, *pmd_k);
 
        /* XXX: create the TLB entry here */
        return 0;
index c083bf6..699ecf1 100644 (file)
@@ -189,6 +189,11 @@ void __init mem_init(void)
 {
        memblock_free_all();
        highmem_init();
+
+       BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE);
+       BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE);
+       BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE);
+       BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE);
 }
 
 #ifdef CONFIG_HIGHMEM
index 95c649f..0ee75ac 100644 (file)
@@ -39,7 +39,8 @@ void __iomem *ioremap(phys_addr_t paddr, unsigned long size)
        if (arc_uncached_addr_space(paddr))
                return (void __iomem *)(u32)paddr;
 
-       return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE);
+       return ioremap_prot(paddr, size,
+                           pgprot_val(pgprot_noncached(PAGE_KERNEL)));
 }
 EXPORT_SYMBOL(ioremap);
 
index 9c7c682..5f71445 100644 (file)
@@ -1,51 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * TLB Management (flush/create/diagnostics) for ARC700
+ * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4
  *
  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
  *
- * vineetg: Aug 2011
- *  -Reintroduce duplicate PD fixup - some customer chips still have the issue
- *
- * vineetg: May 2011
- *  -No need to flush_cache_page( ) for each call to update_mmu_cache()
- *   some of the LMBench tests improved amazingly
- *      = page-fault thrice as fast (75 usec to 28 usec)
- *      = mmap twice as fast (9.6 msec to 4.6 msec),
- *      = fork (5.3 msec to 3.7 msec)
- *
- * vineetg: April 2011 :
- *  -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore,
- *      helps avoid a shift when preparing PD0 from PTE
- *
- * vineetg: April 2011 : Preparing for MMU V3
- *  -MMU v2/v3 BCRs decoded differently
- *  -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512
- *  -tlb_entry_erase( ) can be void
- *  -local_flush_tlb_range( ):
- *      = need not "ceil" @end
- *      = walks MMU only if range spans < 32 entries, as opposed to 256
- *
- * Vineetg: Sept 10th 2008
- *  -Changes related to MMU v2 (Rel 4.8)
- *
- * Vineetg: Aug 29th 2008
- *  -In TLB Flush operations (Metal Fix MMU) there is a explicit command to
- *    flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd,
- *    it fails. Thus need to load it with ANY valid value before invoking
- *    TLBIVUTLB cmd
- *
- * Vineetg: Aug 21th 2008:
- *  -Reduced the duration of IRQ lockouts in TLB Flush routines
- *  -Multiple copies of TLB erase code separated into a "single" function
- *  -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID
- *       in interrupt-safe region.
- *
- * Vineetg: April 23rd Bug #93131
- *    Problem: tlb_flush_kernel_range() doesn't do anything if the range to
- *              flush is more than the size of TLB itself.
- *
- * Rahul Trivedi : Codito Technologies 2004
  */
 
 #include <linux/module.h>
 #include <asm/mmu_context.h>
 #include <asm/mmu.h>
 
-/*                     Need for ARC MMU v2
- *
- * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc.
- * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages
- * map into same set, there would be contention for the 2 ways causing severe
- * Thrashing.
- *
- * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has
- * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways.
- * Given this, the thrashing problem should never happen because once the 3
- * J-TLB entries are created (even though 3rd will knock out one of the prev
- * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy
- *
- * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs.
- * This is a simple design for keeping them in sync. So what do we do?
- * The solution which James came up was pretty neat. It utilised the assoc
- * of uTLBs by not invalidating always but only when absolutely necessary.
- *
- * - Existing TLB commands work as before
- * - New command (TLBWriteNI) for TLB write without clearing uTLBs
- * - New command (TLBIVUTLB) to invalidate uTLBs.
- *
- * The uTLBs need only be invalidated when pages are being removed from the
- * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB
- * as a result of a miss, the removed entry is still allowed to exist in the
- * uTLBs as it is still valid and present in the OS page table. This allows the
- * full associativity of the uTLBs to hide the limited associativity of the main
- * TLB.
- *
- * During a miss handler, the new "TLBWriteNI" command is used to load
- * entries without clearing the uTLBs.
- *
- * When the OS page table is updated, TLB entries that may be associated with a
- * removed page are removed (flushed) from the TLB using TLBWrite. In this
- * circumstance, the uTLBs must also be cleared. This is done by using the
- * existing TLBWrite command. An explicit IVUTLB is also required for those
- * corner cases when TLBWrite was not executed at all because the corresp
- * J-TLB entry got evicted/replaced.
- */
-
-
 /* A copy of the ASID from the PID reg is kept in asid_cache */
 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
 
@@ -120,32 +37,10 @@ static inline void __tlb_entry_erase(void)
 
 static void utlb_invalidate(void)
 {
-#if (CONFIG_ARC_MMU_VER >= 2)
-
-#if (CONFIG_ARC_MMU_VER == 2)
-       /* MMU v2 introduced the uTLB Flush command.
-        * There was however an obscure hardware bug, where uTLB flush would
-        * fail when a prior probe for J-TLB (both totally unrelated) would
-        * return lkup err - because the entry didn't exist in MMU.
-        * The Workaround was to set Index reg with some valid value, prior to
-        * flush. This was fixed in MMU v3
-        */
-       unsigned int idx;
-
-       /* make sure INDEX Reg is valid */
-       idx = read_aux_reg(ARC_REG_TLBINDEX);
-
-       /* If not write some dummy val */
-       if (unlikely(idx & TLB_LKUP_ERR))
-               write_aux_reg(ARC_REG_TLBINDEX, 0xa);
-#endif
-
        write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB);
-#endif
-
 }
 
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 
 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
 {
@@ -176,7 +71,7 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
        }
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
        unsigned int idx;
 
@@ -206,7 +101,7 @@ static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
        write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
-#else  /* CONFIG_ARC_MMU_VER >= 4) */
+#else  /* MMUv4 */
 
 static void tlb_entry_erase(unsigned int vaddr_n_asid)
 {
@@ -214,13 +109,16 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
        write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
 }
 
-static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1)
 {
        write_aux_reg(ARC_REG_TLBPD0, pd0);
-       write_aux_reg(ARC_REG_TLBPD1, pd1);
 
-       if (is_pae40_enabled())
+       if (!is_pae40_enabled()) {
+               write_aux_reg(ARC_REG_TLBPD1, pd1);
+       } else {
+               write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF);
                write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+       }
 
        write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
 }
@@ -496,7 +394,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
        unsigned long flags;
        unsigned int asid_or_sasid, rwx;
        unsigned long pd0;
-       pte_t pd1;
+       phys_addr_t pd1;
 
        /*
         * create_tlb() assumes that current->mm == vma->mm, since
@@ -505,7 +403,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
         *
         * Removing the assumption involves
         * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg.
-        * -Fix the TLB paranoid debug code to not trigger false negatives.
         * -More importantly it makes this handler inconsistent with fast-path
         *  TLB Refill handler which always deals with "current"
         *
@@ -528,8 +425,6 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
 
        local_irq_save(flags);
 
-       tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
-
        vaddr &= PAGE_MASK;
 
        /* update this PTE credentials */
@@ -639,43 +534,6 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
        update_mmu_cache(vma, addr, &pte);
 }
 
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-                               pgtable_t pgtable)
-{
-       struct list_head *lh = (struct list_head *) pgtable;
-
-       assert_spin_locked(&mm->page_table_lock);
-
-       /* FIFO */
-       if (!pmd_huge_pte(mm, pmdp))
-               INIT_LIST_HEAD(lh);
-       else
-               list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
-       pmd_huge_pte(mm, pmdp) = pgtable;
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
-       struct list_head *lh;
-       pgtable_t pgtable;
-
-       assert_spin_locked(&mm->page_table_lock);
-
-       pgtable = pmd_huge_pte(mm, pmdp);
-       lh = (struct list_head *) pgtable;
-       if (list_empty(lh))
-               pmd_huge_pte(mm, pmdp) = NULL;
-       else {
-               pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
-               list_del(lh);
-       }
-
-       pte_val(pgtable[0]) = 0;
-       pte_val(pgtable[1]) = 0;
-
-       return pgtable;
-}
-
 void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
                               unsigned long end)
 {
@@ -706,14 +564,6 @@ void read_decode_mmu_bcr(void)
 {
        struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
        unsigned int tmp;
-       struct bcr_mmu_1_2 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8;
-#else
-               unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8;
-#endif
-       } *mmu2;
-
        struct bcr_mmu_3 {
 #ifdef CONFIG_CPU_BIG_ENDIAN
        unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
@@ -738,23 +588,14 @@ void read_decode_mmu_bcr(void)
        tmp = read_aux_reg(ARC_REG_MMU_BCR);
        mmu->ver = (tmp >> 24);
 
-       if (is_isa_arcompact()) {
-               if (mmu->ver <= 2) {
-                       mmu2 = (struct bcr_mmu_1_2 *)&tmp;
-                       mmu->pg_sz_k = TO_KB(0x2000);
-                       mmu->sets = 1 << mmu2->sets;
-                       mmu->ways = 1 << mmu2->ways;
-                       mmu->u_dtlb = mmu2->u_dtlb;
-                       mmu->u_itlb = mmu2->u_itlb;
-               } else {
-                       mmu3 = (struct bcr_mmu_3 *)&tmp;
-                       mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
-                       mmu->sets = 1 << mmu3->sets;
-                       mmu->ways = 1 << mmu3->ways;
-                       mmu->u_dtlb = mmu3->u_dtlb;
-                       mmu->u_itlb = mmu3->u_itlb;
-                       mmu->sasid = mmu3->sasid;
-               }
+       if (is_isa_arcompact() && mmu->ver == 3) {
+               mmu3 = (struct bcr_mmu_3 *)&tmp;
+               mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
+               mmu->sets = 1 << mmu3->sets;
+               mmu->ways = 1 << mmu3->ways;
+               mmu->u_dtlb = mmu3->u_dtlb;
+               mmu->u_itlb = mmu3->u_itlb;
+               mmu->sasid = mmu3->sasid;
        } else {
                mmu4 = (struct bcr_mmu_4 *)&tmp;
                mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
@@ -780,8 +621,8 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
                          IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
        n += scnprintf(buf + n, len - n,
-                     "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
-                      p_mmu->ver, p_mmu->pg_sz_k, super_pg,
+                     "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
+                      p_mmu->ver, p_mmu->pg_sz_k, super_pg,  CONFIG_PGTABLE_LEVELS,
                       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
                       p_mmu->u_dtlb, p_mmu->u_itlb,
                       IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
@@ -815,22 +656,17 @@ void arc_mmu_init(void)
 
        /*
         * Ensure that MMU features assumed by kernel exist in hardware.
-        * For older ARC700 cpus, it has to be exact match, since the MMU
-        * revisions were not backwards compatible (MMUv3 TLB layout changed
-        * so even if kernel for v2 didn't use any new cmds of v3, it would
-        * still not work.
-        * For HS cpus, MMUv4 was baseline and v5 is backwards compatible
-        * (will run older software).
+        *  - For older ARC700 cpus, only v3 supported
+        *  - For HS cpus, v4 was baseline and v5 is backwards compatible
+        *    (will run older software).
         */
-       if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER)
+       if (is_isa_arcompact() && mmu->ver == 3)
                compat = 1;
-       else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER)
+       else if (is_isa_arcv2() && mmu->ver >= 4)
                compat = 1;
 
-       if (!compat) {
-               panic("MMU ver %d doesn't match kernel built for %d...\n",
-                     mmu->ver, CONFIG_ARC_MMU_VER);
-       }
+       if (!compat)
+               panic("MMU ver %d doesn't match kernel built for\n", mmu->ver);
 
        if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
                panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
@@ -843,14 +679,11 @@ void arc_mmu_init(void)
        if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
                panic("Hardware doesn't support PAE40\n");
 
-       /* Enable the MMU */
-       write_aux_reg(ARC_REG_PID, MMU_ENABLE);
+       /* Enable the MMU with ASID 0 */
+       mmu_setup_asid(NULL, 0);
 
-       /* In smp we use this reg for interrupt 1 scratch */
-#ifdef ARC_USE_SCRATCH_REG
-       /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */
-       write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir);
-#endif
+       /* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */
+       mmu_setup_pgd(NULL, swapper_pg_dir);
 
        if (pae40_exist_but_not_enab())
                write_aux_reg(ARC_REG_TLBPD1HI, 0);
@@ -945,40 +778,3 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
 
        local_irq_restore(flags);
 }
-
-/***********************************************************************
- * Diagnostic Routines
- *  -Called from Low Level TLB Handlers if things don;t look good
- **********************************************************************/
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-/*
- * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
- * don't match
- */
-void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
-{
-       pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
-              is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
-
-       __asm__ __volatile__("flag 1");
-}
-
-void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
-{
-       unsigned int mmu_asid;
-
-       mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
-
-       /*
-        * At the time of a TLB miss/installation
-        *   - HW version needs to match SW version
-        *   - SW needs to have a valid ASID
-        */
-       if (addr < 0x70000000 &&
-           ((mm_asid == MM_CTXT_NO_ASID) ||
-             (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
-               print_asid_mismatch(mm_asid, mmu_asid, 0);
-}
-#endif
index 062fae4..e054780 100644 (file)
@@ -39,7 +39,6 @@
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 #include <asm/processor.h>
-#include <asm/tlb-mmu1.h>
 
 #ifdef CONFIG_ISA_ARCOMPACT
 ;-----------------------------------------------------------------
@@ -94,11 +93,6 @@ ex_saved_reg1:
        st_s  r1, [r0, 4]
        st_s  r2, [r0, 8]
        st_s  r3, [r0, 12]
-
-       ; VERIFY if the ASID in MMU-PID Reg is same as
-       ; one in Linux data structures
-
-       tlb_paranoid_check_asm
 .endm
 
 .macro TLBMISS_RESTORE_REGS
@@ -148,53 +142,16 @@ ex_saved_reg1:
 #endif
 
 ;============================================================================
-;  Troubleshooting Stuff
+;TLB Miss handling Code
 ;============================================================================
 
-; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid
-; When Creating TLB Entries, instead of doing 3 dependent loads from memory,
-; we use the MMU PID Reg to get current ASID.
-; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
-; So we try to detect this in TLB Mis shandler
-
-.macro tlb_paranoid_check_asm
-
-#ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-
-       GET_CURR_TASK_ON_CPU  r3
-       ld r0, [r3, TASK_ACT_MM]
-       ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
-       breq r0, 0, 55f ; Error if no ASID allocated
-
-       lr r1, [ARC_REG_PID]
-       and r1, r1, 0xFF
-
-       and r2, r0, 0xFF        ; MMU PID bits only for comparison
-       breq r1, r2, 5f
-
-55:
-       ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
-       lr  r2, [erstatus]
-       bbit0 r2, STATUS_U_BIT, 5f
-
-       ; We sure are in troubled waters, Flag the error, but to do so
-       ; need to switch to kernel mode stack to call error routine
-       GET_TSK_STACK_BASE   r3, sp
-
-       ; Call printk to shoutout aloud
-       mov r2, 1
-       j print_asid_mismatch
-
-5:     ; ASIDs match so proceed normally
-       nop
-
+#ifndef PMD_SHIFT
+#define PMD_SHIFT PUD_SHIFT
 #endif
 
-.endm
-
-;============================================================================
-;TLB Miss handling Code
-;============================================================================
+#ifndef PUD_SHIFT
+#define PUD_SHIFT PGDIR_SHIFT
+#endif
 
 ;-----------------------------------------------------------------------------
 ; This macro does the page-table lookup for the faulting address.
@@ -203,7 +160,7 @@ ex_saved_reg1:
 
        lr  r2, [efa]
 
-#ifdef ARC_USE_SCRATCH_REG
+#ifdef CONFIG_ISA_ARCV2
        lr  r1, [ARC_REG_SCRATCH_DATA0] ; current pgd
 #else
        GET_CURR_TASK_ON_CPU  r1
@@ -216,6 +173,24 @@ ex_saved_reg1:
        tst     r3, r3
        bz      do_slow_path_pf         ; if no Page Table, do page fault
 
+#if CONFIG_PGTABLE_LEVELS > 3
+       lsr     r0, r2, PUD_SHIFT       ; Bits for indexing into PUD
+       and     r0, r0, (PTRS_PER_PUD - 1)
+       ld.as   r1, [r3, r0]            ; PMD entry
+       tst     r1, r1
+       bz      do_slow_path_pf
+       mov     r3, r1
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 2
+       lsr     r0, r2, PMD_SHIFT       ; Bits for indexing into PMD
+       and     r0, r0, (PTRS_PER_PMD - 1)
+       ld.as   r1, [r3, r0]            ; PMD entry
+       tst     r1, r1
+       bz      do_slow_path_pf
+       mov     r3, r1
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        and.f   0, r3, _PAGE_HW_SZ      ; Is this Huge PMD (thp)
        add2.nz r1, r1, r0
@@ -279,7 +254,7 @@ ex_saved_reg1:
 ; Commit the TLB entry into MMU
 
 .macro COMMIT_ENTRY_TO_MMU
-#if (CONFIG_ARC_MMU_VER < 4)
+#ifdef CONFIG_ARC_MMU_V3
 
        /* Get free TLB slot: Set = computed from vaddr, way = random */
        sr  TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -375,13 +350,6 @@ ENTRY(EV_TLBMissD)
 
        CONV_PTE_TO_TLB
 
-#if (CONFIG_ARC_MMU_VER == 1)
-       ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of
-       ; memcpy where 3 parties contend for 2 ways, ensuing a livelock.
-       ; But only for old MMU or one with Metal Fix
-       TLB_WRITE_HEURISTICS
-#endif
-
        COMMIT_ENTRY_TO_MMU
        TLBMISS_RESTORE_REGS
 EV_TLBMissD_fast_ret:  ; additional label for VDK OS-kit instrumentation