arch/powerpc/kernel/vecemu.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Routines to emulate some Altivec/VMX instructions, specifically
   4  * those that can trap when given denormalized operands in Java mode.
   5  */
   6 #include <linux/kernel.h>
   7 #include <linux/errno.h>
   8 #include <linux/sched.h>
   9 #include <asm/ptrace.h>
  10 #include <asm/processor.h>
  11 #include <asm/switch_to.h>
  12 #include <linux/uaccess.h>
  13 #include <asm/inst.h>
  14
  15 /* Functions in vector.S */
  16 extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  17 extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  18 extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  19 extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  20 extern void vrefp(vector128 *dst, vector128 *src);
  21 extern void vrsqrtefp(vector128 *dst, vector128 *src);
  22 extern void vexptep(vector128 *dst, vector128 *src);
  23
  24 static unsigned int exp2s[8] = {
  25         0x800000,
  26         0x8b95c2,
  27         0x9837f0,
  28         0xa5fed7,
  29         0xb504f3,
  30         0xc5672a,
  31         0xd744fd,
  32         0xeac0c7
  33 };
  34
  35 /*
  36  * Computes an estimate of 2^x.  The `s' argument is the 32-bit
  37  * single-precision floating-point representation of x.
  38  */
  39 static unsigned int eexp2(unsigned int s)
  40 {
  41         int exp, pwr;
  42         unsigned int mant, frac;
  43
  44         /* extract exponent field from input */
  45         exp = ((s >> 23) & 0xff) - 127;
  46         if (exp > 7) {
  47                 /* check for NaN input */
  48                 if (exp == 128 && (s & 0x7fffff) != 0)
  49                         return s | 0x400000;    /* return QNaN */
  50                 /* 2^-big = 0, 2^+big = +Inf */
  51                 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  52         }
  53         if (exp < -23)
  54                 return 0x3f800000;      /* 1.0 */
  55
  56         /* convert to fixed point integer in 9.23 representation */
  57         pwr = (s & 0x7fffff) | 0x800000;
  58         if (exp > 0)
  59                 pwr <<= exp;
  60         else
  61                 pwr >>= -exp;
  62         if (s & 0x80000000)
  63                 pwr = -pwr;
  64
  65         /* extract integer part, which becomes exponent part of result */
  66         exp = (pwr >> 23) + 126;
  67         if (exp >= 254)
  68                 return 0x7f800000;
  69         if (exp < -23)
  70                 return 0;
  71
  72         /* table lookup on top 3 bits of fraction to get mantissa */
  73         mant = exp2s[(pwr >> 20) & 7];
  74
  75         /* linear interpolation using remaining 20 bits of fraction */
  76         asm("mulhwu %0,%1,%2" : "=r" (frac)
  77             : "r" (pwr << 12), "r" (0x172b83ff));
  78         asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  79         mant += frac;
  80
  81         if (exp >= 0)
  82                 return mant + (exp << 23);
  83
  84         /* denormalized result */
  85         exp = -exp;
  86         mant += 1 << (exp - 1);
  87         return mant >> exp;
  88 }
  89
  90 /*
  91  * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
  92  * single-precision floating-point representation of x.
  93  */
  94 static unsigned int elog2(unsigned int s)
  95 {
  96         int exp, mant, lz, frac;
  97
  98         exp = s & 0x7f800000;
  99         mant = s & 0x7fffff;
 100         if (exp == 0x7f800000) {        /* Inf or NaN */
 101                 if (mant != 0)
 102                         s |= 0x400000;  /* turn NaN into QNaN */
 103                 return s;
 104         }
 105         if ((exp | mant) == 0)          /* +0 or -0 */
 106                 return 0xff800000;      /* return -Inf */
 107
 108         if (exp == 0) {
 109                 /* denormalized */
 110                 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
 111                 mant <<= lz - 8;
 112                 exp = (-118 - lz) << 23;
 113         } else {
 114                 mant |= 0x800000;
 115                 exp -= 127 << 23;
 116         }
 117
 118         if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
 119                 exp |= 0x400000;                        /* 0.5 * 2^23 */
 120                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 121                     : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
 122         }
 123         if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
 124                 exp |= 0x200000;                        /* 0.25 * 2^23 */
 125                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 126                     : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
 127         }
 128         if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
 129                 exp |= 0x100000;                        /* 0.125 * 2^23 */
 130                 asm("mulhwu %0,%1,%2" : "=r" (mant)
 131                     : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
 132         }
 133         if (mant > 0x800000) {                          /* 1.0 * 2^23 */
 134                 /* calculate (mant - 1) * 1.381097463 */
 135                 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
 136                 asm("mulhwu %0,%1,%2" : "=r" (frac)
 137                     : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
 138                 exp += frac;
 139         }
 140         s = exp & 0x80000000;
 141         if (exp != 0) {
 142                 if (s)
 143                         exp = -exp;
 144                 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
 145                 lz = 8 - lz;
 146                 if (lz > 0)
 147                         exp >>= lz;
 148                 else if (lz < 0)
 149                         exp <<= -lz;
 150                 s += ((lz + 126) << 23) + exp;
 151         }
 152         return s;
 153 }
 154
 155 #define VSCR_SAT        1
 156
 157 static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
 158 {
 159         int exp, mant;
 160
 161         exp = (x >> 23) & 0xff;
 162         mant = x & 0x7fffff;
 163         if (exp == 255 && mant != 0)
 164                 return 0;               /* NaN -> 0 */
 165         exp = exp - 127 + scale;
 166         if (exp < 0)
 167                 return 0;               /* round towards zero */
 168         if (exp >= 31) {
 169                 /* saturate, unless the result would be -2^31 */
 170                 if (x + (scale << 23) != 0xcf000000)
 171                         *vscrp |= VSCR_SAT;
 172                 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
 173         }
 174         mant |= 0x800000;
 175         mant = (mant << 7) >> (30 - exp);
 176         return (x & 0x80000000)? -mant: mant;
 177 }
 178
 179 static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
 180 {
 181         int exp;
 182         unsigned int mant;
 183
 184         exp = (x >> 23) & 0xff;
 185         mant = x & 0x7fffff;
 186         if (exp == 255 && mant != 0)
 187                 return 0;               /* NaN -> 0 */
 188         exp = exp - 127 + scale;
 189         if (exp < 0)
 190                 return 0;               /* round towards zero */
 191         if (x & 0x80000000) {
 192                 /* negative => saturate to 0 */
 193                 *vscrp |= VSCR_SAT;
 194                 return 0;
 195         }
 196         if (exp >= 32) {
 197                 /* saturate */
 198                 *vscrp |= VSCR_SAT;
 199                 return 0xffffffff;
 200         }
 201         mant |= 0x800000;
 202         mant = (mant << 8) >> (31 - exp);
 203         return mant;
 204 }
 205
 206 /* Round to floating integer, towards 0 */
 207 static unsigned int rfiz(unsigned int x)
 208 {
 209         int exp;
 210
 211         exp = ((x >> 23) & 0xff) - 127;
 212         if (exp == 128 && (x & 0x7fffff) != 0)
 213                 return x | 0x400000;    /* NaN -> make it a QNaN */
 214         if (exp >= 23)
 215                 return x;               /* it's an integer already (or Inf) */
 216         if (exp < 0)
 217                 return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
 218         return x & ~(0x7fffff >> exp);
 219 }
 220
 221 /* Round to floating integer, towards +/- Inf */
 222 static unsigned int rfii(unsigned int x)
 223 {
 224         int exp, mask;
 225
 226         exp = ((x >> 23) & 0xff) - 127;
 227         if (exp == 128 && (x & 0x7fffff) != 0)
 228                 return x | 0x400000;    /* NaN -> make it a QNaN */
 229         if (exp >= 23)
 230                 return x;               /* it's an integer already (or Inf) */
 231         if ((x & 0x7fffffff) == 0)
 232                 return x;               /* +/-0 -> +/-0 */
 233         if (exp < 0)
 234                 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
 235                 return (x & 0x80000000) | 0x3f800000;
 236         mask = 0x7fffff >> exp;
 237         /* mantissa overflows into exponent - that's OK,
 238            it can't overflow into the sign bit */
 239         return (x + mask) & ~mask;
 240 }
 241
 242 /* Round to floating integer, to nearest */
 243 static unsigned int rfin(unsigned int x)
 244 {
 245         int exp, half;
 246
 247         exp = ((x >> 23) & 0xff) - 127;
 248         if (exp == 128 && (x & 0x7fffff) != 0)
 249                 return x | 0x400000;    /* NaN -> make it a QNaN */
 250         if (exp >= 23)
 251                 return x;               /* it's an integer already (or Inf) */
 252         if (exp < -1)
 253                 return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
 254         if (exp == -1)
 255                 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
 256                 return (x & 0x80000000) | 0x3f800000;
 257         half = 0x400000 >> exp;
 258         /* add 0.5 to the magnitude and chop off the fraction bits */
 259         return (x + half) & ~(0x7fffff >> exp);
 260 }
 261
 262 int emulate_altivec(struct pt_regs *regs)
 263 {
 264         struct ppc_inst instr;
 265         unsigned int i, word;
 266         unsigned int va, vb, vc, vd;
 267         vector128 *vrs;
 268
 269         if (get_user_instr(instr, (void __user *)regs->nip))
 270                 return -EFAULT;
 271
 272         word = ppc_inst_val(instr);
 273         if (ppc_inst_primary_opcode(instr) != 4)
 274                 return -EINVAL;         /* not an altivec instruction */
 275         vd = (word >> 21) & 0x1f;
 276         va = (word >> 16) & 0x1f;
 277         vb = (word >> 11) & 0x1f;
 278         vc = (word >> 6) & 0x1f;
 279
 280         vrs = current->thread.vr_state.vr;
 281         switch (word & 0x3f) {
 282         case 10:
 283                 switch (vc) {
 284                 case 0: /* vaddfp */
 285                         vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
 286                         break;
 287                 case 1: /* vsubfp */
 288                         vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
 289                         break;
 290                 case 4: /* vrefp */
 291                         vrefp(&vrs[vd], &vrs[vb]);
 292                         break;
 293                 case 5: /* vrsqrtefp */
 294                         vrsqrtefp(&vrs[vd], &vrs[vb]);
 295                         break;
 296                 case 6: /* vexptefp */
 297                         for (i = 0; i < 4; ++i)
 298                                 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
 299                         break;
 300                 case 7: /* vlogefp */
 301                         for (i = 0; i < 4; ++i)
 302                                 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
 303                         break;
 304                 case 8:         /* vrfin */
 305                         for (i = 0; i < 4; ++i)
 306                                 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
 307                         break;
 308                 case 9:         /* vrfiz */
 309                         for (i = 0; i < 4; ++i)
 310                                 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
 311                         break;
 312                 case 10:        /* vrfip */
 313                         for (i = 0; i < 4; ++i) {
 314                                 u32 x = vrs[vb].u[i];
 315                                 x = (x & 0x80000000)? rfiz(x): rfii(x);
 316                                 vrs[vd].u[i] = x;
 317                         }
 318                         break;
 319                 case 11:        /* vrfim */
 320                         for (i = 0; i < 4; ++i) {
 321                                 u32 x = vrs[vb].u[i];
 322                                 x = (x & 0x80000000)? rfii(x): rfiz(x);
 323                                 vrs[vd].u[i] = x;
 324                         }
 325                         break;
 326                 case 14:        /* vctuxs */
 327                         for (i = 0; i < 4; ++i)
 328                                 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
 329                                         &current->thread.vr_state.vscr.u[3]);
 330                         break;
 331                 case 15:        /* vctsxs */
 332                         for (i = 0; i < 4; ++i)
 333                                 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
 334                                         &current->thread.vr_state.vscr.u[3]);
 335                         break;
 336                 default:
 337                         return -EINVAL;
 338                 }
 339                 break;
 340         case 46:        /* vmaddfp */
 341                 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 342                 break;
 343         case 47:        /* vnmsubfp */
 344                 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 345                 break;
 346         default:
 347                 return -EINVAL;
 348         }
 349
 350         return 0;
 351 }