arch/arm/lib/div64.S

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2 /*
   3  *  linux/arch/arm/lib/div64.S
   4  *
   5  *  Optimized computation of 64-bit dividend / 32-bit divisor
   6  *
   7  *  Author:     Nicolas Pitre
   8  *  Created:    Oct 5, 2003
   9  *  Copyright:  Monta Vista Software, Inc.
  10  */
  11
  12 #include <linux/linkage.h>
  13 #include <asm/assembler.h>
  14 #include <asm/unwind.h>
  15
  16 #ifdef __ARMEB__
  17 #define xh r0
  18 #define xl r1
  19 #define yh r2
  20 #define yl r3
  21 #else
  22 #define xl r0
  23 #define xh r1
  24 #define yl r2
  25 #define yh r3
  26 #endif
  27
  28 /*
  29  * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
  30  *
  31  * Note: Calling convention is totally non standard for optimal code.
  32  *       This is meant to be used by do_div() from include/asm/div64.h only.
  33  *
  34  * Input parameters:
  35  *      xh-xl   = dividend (clobbered)
  36  *      r4      = divisor (preserved)
  37  *
  38  * Output values:
  39  *      yh-yl   = result
  40  *      xh      = remainder
  41  *
  42  * Clobbered regs: xl, ip
  43  */
  44
  45 ENTRY(__do_div64)
  46 UNWIND(.fnstart)
  47
  48         @ Test for easy paths first.
  49         subs    ip, r4, #1
  50         bls     9f                      @ divisor is 0 or 1
  51         tst     ip, r4
  52         beq     8f                      @ divisor is power of 2
  53
  54         @ See if we need to handle upper 32-bit result.
  55         cmp     xh, r4
  56         mov     yh, #0
  57         blo     3f
  58
  59         @ Align divisor with upper part of dividend.
  60         @ The aligned divisor is stored in yl preserving the original.
  61         @ The bit position is stored in ip.
  62
  63 #if __LINUX_ARM_ARCH__ >= 5
  64
  65         clz     yl, r4
  66         clz     ip, xh
  67         sub     yl, yl, ip
  68         mov     ip, #1
  69         mov     ip, ip, lsl yl
  70         mov     yl, r4, lsl yl
  71
  72 #else
  73
  74         mov     yl, r4
  75         mov     ip, #1
  76 1:      cmp     yl, #0x80000000
  77         cmpcc   yl, xh
  78         movcc   yl, yl, lsl #1
  79         movcc   ip, ip, lsl #1
  80         bcc     1b
  81
  82 #endif
  83
  84         @ The division loop for needed upper bit positions.
  85         @ Break out early if dividend reaches 0.
  86 2:      cmp     xh, yl
  87         orrcs   yh, yh, ip
  88         subscs  xh, xh, yl
  89         movsne  ip, ip, lsr #1
  90         mov     yl, yl, lsr #1
  91         bne     2b
  92
  93         @ See if we need to handle lower 32-bit result.
  94 3:      cmp     xh, #0
  95         mov     yl, #0
  96         cmpeq   xl, r4
  97         movlo   xh, xl
  98         retlo   lr
  99
 100         @ The division loop for lower bit positions.
 101         @ Here we shift remainer bits leftwards rather than moving the
 102         @ divisor for comparisons, considering the carry-out bit as well.
 103         mov     ip, #0x80000000
 104 4:      movs    xl, xl, lsl #1
 105         adcs    xh, xh, xh
 106         beq     6f
 107         cmpcc   xh, r4
 108 5:      orrcs   yl, yl, ip
 109         subcs   xh, xh, r4
 110         movs    ip, ip, lsr #1
 111         bne     4b
 112         ret     lr
 113
 114         @ The top part of remainder became zero.  If carry is set
 115         @ (the 33th bit) this is a false positive so resume the loop.
 116         @ Otherwise, if lower part is also null then we are done.
 117 6:      bcs     5b
 118         cmp     xl, #0
 119         reteq   lr
 120
 121         @ We still have remainer bits in the low part.  Bring them up.
 122
 123 #if __LINUX_ARM_ARCH__ >= 5
 124
 125         clz     xh, xl                  @ we know xh is zero here so...
 126         add     xh, xh, #1
 127         mov     xl, xl, lsl xh
 128         mov     ip, ip, lsr xh
 129
 130 #else
 131
 132 7:      movs    xl, xl, lsl #1
 133         mov     ip, ip, lsr #1
 134         bcc     7b
 135
 136 #endif
 137
 138         @ Current remainder is now 1.  It is worthless to compare with
 139         @ divisor at this point since divisor can not be smaller than 3 here.
 140         @ If possible, branch for another shift in the division loop.
 141         @ If no bit position left then we are done.
 142         movs    ip, ip, lsr #1
 143         mov     xh, #1
 144         bne     4b
 145         ret     lr
 146
 147 8:      @ Division by a power of 2: determine what that divisor order is
 148         @ then simply shift values around
 149
 150 #if __LINUX_ARM_ARCH__ >= 5
 151
 152         clz     ip, r4
 153         rsb     ip, ip, #31
 154
 155 #else
 156
 157         mov     yl, r4
 158         cmp     r4, #(1 << 16)
 159         mov     ip, #0
 160         movhs   yl, yl, lsr #16
 161         movhs   ip, #16
 162
 163         cmp     yl, #(1 << 8)
 164         movhs   yl, yl, lsr #8
 165         addhs   ip, ip, #8
 166
 167         cmp     yl, #(1 << 4)
 168         movhs   yl, yl, lsr #4
 169         addhs   ip, ip, #4
 170
 171         cmp     yl, #(1 << 2)
 172         addhi   ip, ip, #3
 173         addls   ip, ip, yl, lsr #1
 174
 175 #endif
 176
 177         mov     yh, xh, lsr ip
 178         mov     yl, xl, lsr ip
 179         rsb     ip, ip, #32
 180  ARM(   orr     yl, yl, xh, lsl ip      )
 181  THUMB( lsl     xh, xh, ip              )
 182  THUMB( orr     yl, yl, xh              )
 183         mov     xh, xl, lsl ip
 184         mov     xh, xh, lsr ip
 185         ret     lr
 186
 187         @ eq -> division by 1: obvious enough...
 188 9:      moveq   yl, xl
 189         moveq   yh, xh
 190         moveq   xh, #0
 191         reteq   lr
 192 UNWIND(.fnend)
 193
 194 UNWIND(.fnstart)
 195 UNWIND(.pad #4)
 196 UNWIND(.save {lr})
 197 Ldiv0_64:
 198         @ Division by 0:
 199         str     lr, [sp, #-8]!
 200         bl      __div0
 201
 202         @ as wrong as it could be...
 203         mov     yl, #0
 204         mov     yh, #0
 205         mov     xh, #0
 206         ldr     pc, [sp], #8
 207
 208 UNWIND(.fnend)
 209 ENDPROC(__do_div64)