io_uring: don't assume mm is constant across submits
[linux-2.6-microblaze.git] / lib / raid6 / recov_avx512.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2016 Intel Corporation
4  *
5  * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6  * Author: Megha Dey <megha.dey@linux.intel.com>
7  */
8
9 #ifdef CONFIG_AS_AVX512
10
11 #include <linux/raid/pq.h>
12 #include "x86.h"
13
14 static int raid6_has_avx512(void)
15 {
16         return boot_cpu_has(X86_FEATURE_AVX2) &&
17                 boot_cpu_has(X86_FEATURE_AVX) &&
18                 boot_cpu_has(X86_FEATURE_AVX512F) &&
19                 boot_cpu_has(X86_FEATURE_AVX512BW) &&
20                 boot_cpu_has(X86_FEATURE_AVX512VL) &&
21                 boot_cpu_has(X86_FEATURE_AVX512DQ);
22 }
23
24 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
25                                      int failb, void **ptrs)
26 {
27         u8 *p, *q, *dp, *dq;
28         const u8 *pbmul;        /* P multiplier table for B data */
29         const u8 *qmul;         /* Q multiplier table (for both) */
30         const u8 x0f = 0x0f;
31
32         p = (u8 *)ptrs[disks-2];
33         q = (u8 *)ptrs[disks-1];
34
35         /*
36          * Compute syndrome with zero for the missing data pages
37          * Use the dead data pages as temporary storage for
38          * delta p and delta q
39          */
40
41         dp = (u8 *)ptrs[faila];
42         ptrs[faila] = (void *)raid6_empty_zero_page;
43         ptrs[disks-2] = dp;
44         dq = (u8 *)ptrs[failb];
45         ptrs[failb] = (void *)raid6_empty_zero_page;
46         ptrs[disks-1] = dq;
47
48         raid6_call.gen_syndrome(disks, bytes, ptrs);
49
50         /* Restore pointer table */
51         ptrs[faila]   = dp;
52         ptrs[failb]   = dq;
53         ptrs[disks-2] = p;
54         ptrs[disks-1] = q;
55
56         /* Now, pick the proper data tables */
57         pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
58         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
59                 raid6_gfexp[failb]]];
60
61         kernel_fpu_begin();
62
63         /* zmm0 = x0f[16] */
64         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
65
66         while (bytes) {
67 #ifdef CONFIG_X86_64
68                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
69                              "vmovdqa64 %1, %%zmm9\n\t"
70                              "vmovdqa64 %2, %%zmm0\n\t"
71                              "vmovdqa64 %3, %%zmm8\n\t"
72                              "vpxorq %4, %%zmm1, %%zmm1\n\t"
73                              "vpxorq %5, %%zmm9, %%zmm9\n\t"
74                              "vpxorq %6, %%zmm0, %%zmm0\n\t"
75                              "vpxorq %7, %%zmm8, %%zmm8"
76                              :
77                              : "m" (q[0]), "m" (q[64]), "m" (p[0]),
78                                "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
79                                "m" (dp[0]), "m" (dp[64]));
80
81                 /*
82                  * 1 = dq[0]  ^ q[0]
83                  * 9 = dq[64] ^ q[64]
84                  * 0 = dp[0]  ^ p[0]
85                  * 8 = dp[64] ^ p[64]
86                  */
87
88                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
89                              "vbroadcasti64x2 %1, %%zmm5"
90                              :
91                              : "m" (qmul[0]), "m" (qmul[16]));
92
93                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
94                              "vpsraw $4, %%zmm9, %%zmm12\n\t"
95                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
96                              "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
97                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
98                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
99                              "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
100                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
101                              "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
102                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
103                              "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
104                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
105                              :
106                              : );
107
108                 /*
109                  * 5 = qx[0]
110                  * 15 = qx[64]
111                  */
112
113                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
114                              "vbroadcasti64x2 %1, %%zmm1\n\t"
115                              "vpsraw $4, %%zmm0, %%zmm2\n\t"
116                              "vpsraw $4, %%zmm8, %%zmm6\n\t"
117                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
118                              "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
119                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
120                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
121                              "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
122                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
123                              "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
124                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
125                              "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
126                              "vpxorq %%zmm12, %%zmm13, %%zmm13"
127                              :
128                              : "m" (pbmul[0]), "m" (pbmul[16]));
129
130                 /*
131                  * 1  = pbmul[px[0]]
132                  * 13 = pbmul[px[64]]
133                  */
134                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
135                              "vpxorq %%zmm15, %%zmm13, %%zmm13"
136                              :
137                              : );
138
139                 /*
140                  * 1 = db = DQ
141                  * 13 = db[64] = DQ[64]
142                  */
143                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
144                              "vmovdqa64 %%zmm13,%1\n\t"
145                              "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
146                              "vpxorq %%zmm13, %%zmm8, %%zmm8"
147                              :
148                              : "m" (dq[0]), "m" (dq[64]));
149
150                 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
151                              "vmovdqa64 %%zmm8, %1"
152                              :
153                              : "m" (dp[0]), "m" (dp[64]));
154
155                 bytes -= 128;
156                 p += 128;
157                 q += 128;
158                 dp += 128;
159                 dq += 128;
160 #else
161                 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
162                              "vmovdqa64 %1, %%zmm0\n\t"
163                              "vpxorq %2, %%zmm1, %%zmm1\n\t"
164                              "vpxorq %3, %%zmm0, %%zmm0"
165                              :
166                              : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
167
168                 /* 1 = dq ^ q;  0 = dp ^ p */
169
170                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
171                              "vbroadcasti64x2 %1, %%zmm5"
172                              :
173                              : "m" (qmul[0]), "m" (qmul[16]));
174
175                 /*
176                  * 1 = dq ^ q
177                  * 3 = dq ^ p >> 4
178                  */
179                 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
180                              "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
181                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
182                              "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
183                              "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
184                              "vpxorq %%zmm4, %%zmm5, %%zmm5"
185                              :
186                              : );
187
188                 /* 5 = qx */
189
190                 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
191                              "vbroadcasti64x2 %1, %%zmm1"
192                              :
193                              : "m" (pbmul[0]), "m" (pbmul[16]));
194
195                 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
196                              "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
197                              "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
198                              "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
199                              "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
200                              "vpxorq %%zmm4, %%zmm1, %%zmm1"
201                              :
202                              : );
203
204                 /* 1 = pbmul[px] */
205                 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
206                              /* 1 = db = DQ */
207                              "vmovdqa64 %%zmm1, %0\n\t"
208                              :
209                              : "m" (dq[0]));
210
211                 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
212                              "vmovdqa64 %%zmm0, %0"
213                              :
214                              : "m" (dp[0]));
215
216                 bytes -= 64;
217                 p += 64;
218                 q += 64;
219                 dp += 64;
220                 dq += 64;
221 #endif
222         }
223
224         kernel_fpu_end();
225 }
226
227 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
228                                      void **ptrs)
229 {
230         u8 *p, *q, *dq;
231         const u8 *qmul;         /* Q multiplier table */
232         const u8 x0f = 0x0f;
233
234         p = (u8 *)ptrs[disks-2];
235         q = (u8 *)ptrs[disks-1];
236
237         /*
238          * Compute syndrome with zero for the missing data page
239          * Use the dead data page as temporary storage for delta q
240          */
241
242         dq = (u8 *)ptrs[faila];
243         ptrs[faila] = (void *)raid6_empty_zero_page;
244         ptrs[disks-1] = dq;
245
246         raid6_call.gen_syndrome(disks, bytes, ptrs);
247
248         /* Restore pointer table */
249         ptrs[faila]   = dq;
250         ptrs[disks-1] = q;
251
252         /* Now, pick the proper data tables */
253         qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
254
255         kernel_fpu_begin();
256
257         asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
258
259         while (bytes) {
260 #ifdef CONFIG_X86_64
261                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
262                              "vmovdqa64 %1, %%zmm8\n\t"
263                              "vpxorq %2, %%zmm3, %%zmm3\n\t"
264                              "vpxorq %3, %%zmm8, %%zmm8"
265                              :
266                              : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
267                                "m" (q[64]));
268
269                 /*
270                  * 3 = q[0] ^ dq[0]
271                  * 8 = q[64] ^ dq[64]
272                  */
273                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
274                              "vmovapd %%zmm0, %%zmm13\n\t"
275                              "vbroadcasti64x2 %1, %%zmm1\n\t"
276                              "vmovapd %%zmm1, %%zmm14"
277                              :
278                              : "m" (qmul[0]), "m" (qmul[16]));
279
280                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
281                              "vpsraw $4, %%zmm8, %%zmm12\n\t"
282                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
283                              "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
284                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
285                              "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
286                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
287                              "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
288                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
289                              "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
290                              "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
291                              "vpxorq %%zmm13, %%zmm14, %%zmm14"
292                              :
293                              : );
294
295                 /*
296                  * 1  = qmul[q[0]  ^ dq[0]]
297                  * 14 = qmul[q[64] ^ dq[64]]
298                  */
299                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
300                              "vmovdqa64 %1, %%zmm12\n\t"
301                              "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
302                              "vpxorq %%zmm14, %%zmm12, %%zmm12"
303                              :
304                              : "m" (p[0]), "m" (p[64]));
305
306                 /*
307                  * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
308                  * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
309                  */
310
311                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
312                              "vmovdqa64 %%zmm14, %1\n\t"
313                              "vmovdqa64 %%zmm2, %2\n\t"
314                              "vmovdqa64 %%zmm12,%3"
315                              :
316                              : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
317                                "m" (p[64]));
318
319                 bytes -= 128;
320                 p += 128;
321                 q += 128;
322                 dq += 128;
323 #else
324                 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
325                              "vpxorq %1, %%zmm3, %%zmm3"
326                              :
327                              : "m" (dq[0]), "m" (q[0]));
328
329                 /* 3 = q ^ dq */
330
331                 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
332                              "vbroadcasti64x2 %1, %%zmm1"
333                              :
334                              : "m" (qmul[0]), "m" (qmul[16]));
335
336                 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
337                              "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
338                              "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
339                              "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
340                              "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
341                              "vpxorq %%zmm0, %%zmm1, %%zmm1"
342                              :
343                              : );
344
345                 /* 1 = qmul[q ^ dq] */
346
347                 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
348                              "vpxorq %%zmm1, %%zmm2, %%zmm2"
349                              :
350                              : "m" (p[0]));
351
352                 /* 2 = p ^ qmul[q ^ dq] */
353
354                 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
355                              "vmovdqa64 %%zmm2, %1"
356                              :
357                              : "m" (dq[0]), "m" (p[0]));
358
359                 bytes -= 64;
360                 p += 64;
361                 q += 64;
362                 dq += 64;
363 #endif
364         }
365
366         kernel_fpu_end();
367 }
368
369 const struct raid6_recov_calls raid6_recov_avx512 = {
370         .data2 = raid6_2data_recov_avx512,
371         .datap = raid6_datap_recov_avx512,
372         .valid = raid6_has_avx512,
373 #ifdef CONFIG_X86_64
374         .name = "avx512x2",
375 #else
376         .name = "avx512x1",
377 #endif
378         .priority = 3,
379 };
380
381 #else
382 #warning "your version of binutils lacks AVX512 support"
383 #endif