Merge tag 'spi-fix-v5.14-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/brooni...
[linux-2.6-microblaze.git] / drivers / gpu / drm / savage / savage_state.c
1 /* savage_state.c -- State and drawing support for Savage
2  *
3  * Copyright 2004  Felix Kuehling
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sub license,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25
26 #include <linux/slab.h>
27 #include <linux/uaccess.h>
28
29 #include <drm/drm_device.h>
30 #include <drm/drm_file.h>
31 #include <drm/drm_print.h>
32 #include <drm/savage_drm.h>
33
34 #include "savage_drv.h"
35
36 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
37                                const struct drm_clip_rect * pbox)
38 {
39         uint32_t scstart = dev_priv->state.s3d.new_scstart;
40         uint32_t scend = dev_priv->state.s3d.new_scend;
41         scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
42             ((uint32_t) pbox->x1 & 0x000007ff) |
43             (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
44         scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
45             (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
46             ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
47         if (scstart != dev_priv->state.s3d.scstart ||
48             scend != dev_priv->state.s3d.scend) {
49                 DMA_LOCALS;
50                 BEGIN_DMA(4);
51                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
52                 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
53                 DMA_WRITE(scstart);
54                 DMA_WRITE(scend);
55                 dev_priv->state.s3d.scstart = scstart;
56                 dev_priv->state.s3d.scend = scend;
57                 dev_priv->waiting = 1;
58                 DMA_COMMIT();
59         }
60 }
61
62 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
63                               const struct drm_clip_rect * pbox)
64 {
65         uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
66         uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
67         drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
68             ((uint32_t) pbox->x1 & 0x000007ff) |
69             (((uint32_t) pbox->y1 << 12) & 0x00fff000);
70         drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
71             (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
72             ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
73         if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
74             drawctrl1 != dev_priv->state.s4.drawctrl1) {
75                 DMA_LOCALS;
76                 BEGIN_DMA(4);
77                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
78                 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
79                 DMA_WRITE(drawctrl0);
80                 DMA_WRITE(drawctrl1);
81                 dev_priv->state.s4.drawctrl0 = drawctrl0;
82                 dev_priv->state.s4.drawctrl1 = drawctrl1;
83                 dev_priv->waiting = 1;
84                 DMA_COMMIT();
85         }
86 }
87
88 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
89                                  uint32_t addr)
90 {
91         if ((addr & 6) != 2) {  /* reserved bits */
92                 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
93                 return -EINVAL;
94         }
95         if (!(addr & 1)) {      /* local */
96                 addr &= ~7;
97                 if (addr < dev_priv->texture_offset ||
98                     addr >= dev_priv->texture_offset + dev_priv->texture_size) {
99                         DRM_ERROR
100                             ("bad texAddr%d %08x (local addr out of range)\n",
101                              unit, addr);
102                         return -EINVAL;
103                 }
104         } else {                /* AGP */
105                 if (!dev_priv->agp_textures) {
106                         DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
107                                   unit, addr);
108                         return -EINVAL;
109                 }
110                 addr &= ~7;
111                 if (addr < dev_priv->agp_textures->offset ||
112                     addr >= (dev_priv->agp_textures->offset +
113                              dev_priv->agp_textures->size)) {
114                         DRM_ERROR
115                             ("bad texAddr%d %08x (AGP addr out of range)\n",
116                              unit, addr);
117                         return -EINVAL;
118                 }
119         }
120         return 0;
121 }
122
123 #define SAVE_STATE(reg,where)                   \
124         if(start <= reg && start+count > reg)   \
125                 dev_priv->state.where = regs[reg - start]
126 #define SAVE_STATE_MASK(reg,where,mask) do {                    \
127         if(start <= reg && start+count > reg) {                 \
128                 uint32_t tmp;                                   \
129                 tmp = regs[reg - start];                        \
130                 dev_priv->state.where = (tmp & (mask)) |        \
131                         (dev_priv->state.where & ~(mask));      \
132         }                                                       \
133 } while (0)
134
135 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
136                                    unsigned int start, unsigned int count,
137                                    const uint32_t *regs)
138 {
139         if (start < SAVAGE_TEXPALADDR_S3D ||
140             start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
141                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
142                           start, start + count - 1);
143                 return -EINVAL;
144         }
145
146         SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
147                         ~SAVAGE_SCISSOR_MASK_S3D);
148         SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
149                         ~SAVAGE_SCISSOR_MASK_S3D);
150
151         /* if any texture regs were changed ... */
152         if (start <= SAVAGE_TEXCTRL_S3D &&
153             start + count > SAVAGE_TEXPALADDR_S3D) {
154                 /* ... check texture state */
155                 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
156                 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
157                 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
158                         return savage_verify_texaddr(dev_priv, 0,
159                                                 dev_priv->state.s3d.texaddr);
160         }
161
162         return 0;
163 }
164
165 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
166                                   unsigned int start, unsigned int count,
167                                   const uint32_t *regs)
168 {
169         int ret = 0;
170
171         if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
172             start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
173                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
174                           start, start + count - 1);
175                 return -EINVAL;
176         }
177
178         SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
179                         ~SAVAGE_SCISSOR_MASK_S4);
180         SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
181                         ~SAVAGE_SCISSOR_MASK_S4);
182
183         /* if any texture regs were changed ... */
184         if (start <= SAVAGE_TEXDESCR_S4 &&
185             start + count > SAVAGE_TEXPALADDR_S4) {
186                 /* ... check texture state */
187                 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
188                 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
189                 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
190                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
191                         ret |= savage_verify_texaddr(dev_priv, 0,
192                                                 dev_priv->state.s4.texaddr0);
193                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
194                         ret |= savage_verify_texaddr(dev_priv, 1,
195                                                 dev_priv->state.s4.texaddr1);
196         }
197
198         return ret;
199 }
200
201 #undef SAVE_STATE
202 #undef SAVE_STATE_MASK
203
204 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
205                                  const drm_savage_cmd_header_t * cmd_header,
206                                  const uint32_t *regs)
207 {
208         unsigned int count = cmd_header->state.count;
209         unsigned int start = cmd_header->state.start;
210         unsigned int count2 = 0;
211         unsigned int bci_size;
212         int ret;
213         DMA_LOCALS;
214
215         if (!count)
216                 return 0;
217
218         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
219                 ret = savage_verify_state_s3d(dev_priv, start, count, regs);
220                 if (ret != 0)
221                         return ret;
222                 /* scissor regs are emitted in savage_dispatch_draw */
223                 if (start < SAVAGE_SCSTART_S3D) {
224                         if (start + count > SAVAGE_SCEND_S3D + 1)
225                                 count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
226                         if (start + count > SAVAGE_SCSTART_S3D)
227                                 count = SAVAGE_SCSTART_S3D - start;
228                 } else if (start <= SAVAGE_SCEND_S3D) {
229                         if (start + count > SAVAGE_SCEND_S3D + 1) {
230                                 count -= SAVAGE_SCEND_S3D + 1 - start;
231                                 start = SAVAGE_SCEND_S3D + 1;
232                         } else
233                                 return 0;
234                 }
235         } else {
236                 ret = savage_verify_state_s4(dev_priv, start, count, regs);
237                 if (ret != 0)
238                         return ret;
239                 /* scissor regs are emitted in savage_dispatch_draw */
240                 if (start < SAVAGE_DRAWCTRL0_S4) {
241                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
242                                 count2 = count -
243                                          (SAVAGE_DRAWCTRL1_S4 + 1 - start);
244                         if (start + count > SAVAGE_DRAWCTRL0_S4)
245                                 count = SAVAGE_DRAWCTRL0_S4 - start;
246                 } else if (start <= SAVAGE_DRAWCTRL1_S4) {
247                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
248                                 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
249                                 start = SAVAGE_DRAWCTRL1_S4 + 1;
250                         } else
251                                 return 0;
252                 }
253         }
254
255         bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
256
257         if (cmd_header->state.global) {
258                 BEGIN_DMA(bci_size + 1);
259                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
260                 dev_priv->waiting = 1;
261         } else {
262                 BEGIN_DMA(bci_size);
263         }
264
265         do {
266                 while (count > 0) {
267                         unsigned int n = count < 255 ? count : 255;
268                         DMA_SET_REGISTERS(start, n);
269                         DMA_COPY(regs, n);
270                         count -= n;
271                         start += n;
272                         regs += n;
273                 }
274                 start += 2;
275                 regs += 2;
276                 count = count2;
277                 count2 = 0;
278         } while (count);
279
280         DMA_COMMIT();
281
282         return 0;
283 }
284
285 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
286                                     const drm_savage_cmd_header_t * cmd_header,
287                                     const struct drm_buf * dmabuf)
288 {
289         unsigned char reorder = 0;
290         unsigned int prim = cmd_header->prim.prim;
291         unsigned int skip = cmd_header->prim.skip;
292         unsigned int n = cmd_header->prim.count;
293         unsigned int start = cmd_header->prim.start;
294         unsigned int i;
295         BCI_LOCALS;
296
297         if (!dmabuf) {
298                 DRM_ERROR("called without dma buffers!\n");
299                 return -EINVAL;
300         }
301
302         if (!n)
303                 return 0;
304
305         switch (prim) {
306         case SAVAGE_PRIM_TRILIST_201:
307                 reorder = 1;
308                 prim = SAVAGE_PRIM_TRILIST;
309                 fallthrough;
310         case SAVAGE_PRIM_TRILIST:
311                 if (n % 3 != 0) {
312                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
313                                   n);
314                         return -EINVAL;
315                 }
316                 break;
317         case SAVAGE_PRIM_TRISTRIP:
318         case SAVAGE_PRIM_TRIFAN:
319                 if (n < 3) {
320                         DRM_ERROR
321                             ("wrong number of vertices %u in TRIFAN/STRIP\n",
322                              n);
323                         return -EINVAL;
324                 }
325                 break;
326         default:
327                 DRM_ERROR("invalid primitive type %u\n", prim);
328                 return -EINVAL;
329         }
330
331         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
332                 if (skip != 0) {
333                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
334                         return -EINVAL;
335                 }
336         } else {
337                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
338                     (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
339                     (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
340                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
341                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
342                         return -EINVAL;
343                 }
344                 if (reorder) {
345                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
346                         return -EINVAL;
347                 }
348         }
349
350         if (start + n > dmabuf->total / 32) {
351                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
352                           start, start + n - 1, dmabuf->total / 32);
353                 return -EINVAL;
354         }
355
356         /* Vertex DMA doesn't work with command DMA at the same time,
357          * so we use BCI_... to submit commands here. Flush buffered
358          * faked DMA first. */
359         DMA_FLUSH();
360
361         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
362                 BEGIN_BCI(2);
363                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
364                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
365                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
366         }
367         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
368                 /* Workaround for what looks like a hardware bug. If a
369                  * WAIT_3D_IDLE was emitted some time before the
370                  * indexed drawing command then the engine will lock
371                  * up. There are two known workarounds:
372                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
373                 BEGIN_BCI(63);
374                 for (i = 0; i < 63; ++i)
375                         BCI_WRITE(BCI_CMD_WAIT);
376                 dev_priv->waiting = 0;
377         }
378
379         prim <<= 25;
380         while (n != 0) {
381                 /* Can emit up to 255 indices (85 triangles) at once. */
382                 unsigned int count = n > 255 ? 255 : n;
383                 if (reorder) {
384                         /* Need to reorder indices for correct flat
385                          * shading while preserving the clock sense
386                          * for correct culling. Only on Savage3D. */
387                         int reorder[3] = { -1, -1, -1 };
388                         reorder[start % 3] = 2;
389
390                         BEGIN_BCI((count + 1 + 1) / 2);
391                         BCI_DRAW_INDICES_S3D(count, prim, start + 2);
392
393                         for (i = start + 1; i + 1 < start + count; i += 2)
394                                 BCI_WRITE((i + reorder[i % 3]) |
395                                           ((i + 1 +
396                                             reorder[(i + 1) % 3]) << 16));
397                         if (i < start + count)
398                                 BCI_WRITE(i + reorder[i % 3]);
399                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
400                         BEGIN_BCI((count + 1 + 1) / 2);
401                         BCI_DRAW_INDICES_S3D(count, prim, start);
402
403                         for (i = start + 1; i + 1 < start + count; i += 2)
404                                 BCI_WRITE(i | ((i + 1) << 16));
405                         if (i < start + count)
406                                 BCI_WRITE(i);
407                 } else {
408                         BEGIN_BCI((count + 2 + 1) / 2);
409                         BCI_DRAW_INDICES_S4(count, prim, skip);
410
411                         for (i = start; i + 1 < start + count; i += 2)
412                                 BCI_WRITE(i | ((i + 1) << 16));
413                         if (i < start + count)
414                                 BCI_WRITE(i);
415                 }
416
417                 start += count;
418                 n -= count;
419
420                 prim |= BCI_CMD_DRAW_CONT;
421         }
422
423         return 0;
424 }
425
426 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
427                                    const drm_savage_cmd_header_t * cmd_header,
428                                    const uint32_t *vtxbuf, unsigned int vb_size,
429                                    unsigned int vb_stride)
430 {
431         unsigned char reorder = 0;
432         unsigned int prim = cmd_header->prim.prim;
433         unsigned int skip = cmd_header->prim.skip;
434         unsigned int n = cmd_header->prim.count;
435         unsigned int start = cmd_header->prim.start;
436         unsigned int vtx_size;
437         unsigned int i;
438         DMA_LOCALS;
439
440         if (!n)
441                 return 0;
442
443         switch (prim) {
444         case SAVAGE_PRIM_TRILIST_201:
445                 reorder = 1;
446                 prim = SAVAGE_PRIM_TRILIST;
447                 fallthrough;
448         case SAVAGE_PRIM_TRILIST:
449                 if (n % 3 != 0) {
450                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
451                                   n);
452                         return -EINVAL;
453                 }
454                 break;
455         case SAVAGE_PRIM_TRISTRIP:
456         case SAVAGE_PRIM_TRIFAN:
457                 if (n < 3) {
458                         DRM_ERROR
459                             ("wrong number of vertices %u in TRIFAN/STRIP\n",
460                              n);
461                         return -EINVAL;
462                 }
463                 break;
464         default:
465                 DRM_ERROR("invalid primitive type %u\n", prim);
466                 return -EINVAL;
467         }
468
469         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
470                 if (skip > SAVAGE_SKIP_ALL_S3D) {
471                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
472                         return -EINVAL;
473                 }
474                 vtx_size = 8;   /* full vertex */
475         } else {
476                 if (skip > SAVAGE_SKIP_ALL_S4) {
477                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
478                         return -EINVAL;
479                 }
480                 vtx_size = 10;  /* full vertex */
481         }
482
483         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
484             (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
485             (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
486
487         if (vtx_size > vb_stride) {
488                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
489                           vtx_size, vb_stride);
490                 return -EINVAL;
491         }
492
493         if (start + n > vb_size / (vb_stride * 4)) {
494                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
495                           start, start + n - 1, vb_size / (vb_stride * 4));
496                 return -EINVAL;
497         }
498
499         prim <<= 25;
500         while (n != 0) {
501                 /* Can emit up to 255 vertices (85 triangles) at once. */
502                 unsigned int count = n > 255 ? 255 : n;
503                 if (reorder) {
504                         /* Need to reorder vertices for correct flat
505                          * shading while preserving the clock sense
506                          * for correct culling. Only on Savage3D. */
507                         int reorder[3] = { -1, -1, -1 };
508                         reorder[start % 3] = 2;
509
510                         BEGIN_DMA(count * vtx_size + 1);
511                         DMA_DRAW_PRIMITIVE(count, prim, skip);
512
513                         for (i = start; i < start + count; ++i) {
514                                 unsigned int j = i + reorder[i % 3];
515                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
516                         }
517
518                         DMA_COMMIT();
519                 } else {
520                         BEGIN_DMA(count * vtx_size + 1);
521                         DMA_DRAW_PRIMITIVE(count, prim, skip);
522
523                         if (vb_stride == vtx_size) {
524                                 DMA_COPY(&vtxbuf[vb_stride * start],
525                                          vtx_size * count);
526                         } else {
527                                 for (i = start; i < start + count; ++i) {
528                                         DMA_COPY(&vtxbuf [vb_stride * i],
529                                                  vtx_size);
530                                 }
531                         }
532
533                         DMA_COMMIT();
534                 }
535
536                 start += count;
537                 n -= count;
538
539                 prim |= BCI_CMD_DRAW_CONT;
540         }
541
542         return 0;
543 }
544
545 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
546                                    const drm_savage_cmd_header_t * cmd_header,
547                                    const uint16_t *idx,
548                                    const struct drm_buf * dmabuf)
549 {
550         unsigned char reorder = 0;
551         unsigned int prim = cmd_header->idx.prim;
552         unsigned int skip = cmd_header->idx.skip;
553         unsigned int n = cmd_header->idx.count;
554         unsigned int i;
555         BCI_LOCALS;
556
557         if (!dmabuf) {
558                 DRM_ERROR("called without dma buffers!\n");
559                 return -EINVAL;
560         }
561
562         if (!n)
563                 return 0;
564
565         switch (prim) {
566         case SAVAGE_PRIM_TRILIST_201:
567                 reorder = 1;
568                 prim = SAVAGE_PRIM_TRILIST;
569                 fallthrough;
570         case SAVAGE_PRIM_TRILIST:
571                 if (n % 3 != 0) {
572                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
573                         return -EINVAL;
574                 }
575                 break;
576         case SAVAGE_PRIM_TRISTRIP:
577         case SAVAGE_PRIM_TRIFAN:
578                 if (n < 3) {
579                         DRM_ERROR
580                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
581                         return -EINVAL;
582                 }
583                 break;
584         default:
585                 DRM_ERROR("invalid primitive type %u\n", prim);
586                 return -EINVAL;
587         }
588
589         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
590                 if (skip != 0) {
591                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
592                         return -EINVAL;
593                 }
594         } else {
595                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
596                     (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
597                     (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
598                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
599                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
600                         return -EINVAL;
601                 }
602                 if (reorder) {
603                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
604                         return -EINVAL;
605                 }
606         }
607
608         /* Vertex DMA doesn't work with command DMA at the same time,
609          * so we use BCI_... to submit commands here. Flush buffered
610          * faked DMA first. */
611         DMA_FLUSH();
612
613         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
614                 BEGIN_BCI(2);
615                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
616                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
617                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
618         }
619         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
620                 /* Workaround for what looks like a hardware bug. If a
621                  * WAIT_3D_IDLE was emitted some time before the
622                  * indexed drawing command then the engine will lock
623                  * up. There are two known workarounds:
624                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
625                 BEGIN_BCI(63);
626                 for (i = 0; i < 63; ++i)
627                         BCI_WRITE(BCI_CMD_WAIT);
628                 dev_priv->waiting = 0;
629         }
630
631         prim <<= 25;
632         while (n != 0) {
633                 /* Can emit up to 255 indices (85 triangles) at once. */
634                 unsigned int count = n > 255 ? 255 : n;
635
636                 /* check indices */
637                 for (i = 0; i < count; ++i) {
638                         if (idx[i] > dmabuf->total / 32) {
639                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
640                                           i, idx[i], dmabuf->total / 32);
641                                 return -EINVAL;
642                         }
643                 }
644
645                 if (reorder) {
646                         /* Need to reorder indices for correct flat
647                          * shading while preserving the clock sense
648                          * for correct culling. Only on Savage3D. */
649                         int reorder[3] = { 2, -1, -1 };
650
651                         BEGIN_BCI((count + 1 + 1) / 2);
652                         BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
653
654                         for (i = 1; i + 1 < count; i += 2)
655                                 BCI_WRITE(idx[i + reorder[i % 3]] |
656                                           (idx[i + 1 +
657                                            reorder[(i + 1) % 3]] << 16));
658                         if (i < count)
659                                 BCI_WRITE(idx[i + reorder[i % 3]]);
660                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
661                         BEGIN_BCI((count + 1 + 1) / 2);
662                         BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
663
664                         for (i = 1; i + 1 < count; i += 2)
665                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
666                         if (i < count)
667                                 BCI_WRITE(idx[i]);
668                 } else {
669                         BEGIN_BCI((count + 2 + 1) / 2);
670                         BCI_DRAW_INDICES_S4(count, prim, skip);
671
672                         for (i = 0; i + 1 < count; i += 2)
673                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
674                         if (i < count)
675                                 BCI_WRITE(idx[i]);
676                 }
677
678                 idx += count;
679                 n -= count;
680
681                 prim |= BCI_CMD_DRAW_CONT;
682         }
683
684         return 0;
685 }
686
687 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
688                                   const drm_savage_cmd_header_t * cmd_header,
689                                   const uint16_t *idx,
690                                   const uint32_t *vtxbuf,
691                                   unsigned int vb_size, unsigned int vb_stride)
692 {
693         unsigned char reorder = 0;
694         unsigned int prim = cmd_header->idx.prim;
695         unsigned int skip = cmd_header->idx.skip;
696         unsigned int n = cmd_header->idx.count;
697         unsigned int vtx_size;
698         unsigned int i;
699         DMA_LOCALS;
700
701         if (!n)
702                 return 0;
703
704         switch (prim) {
705         case SAVAGE_PRIM_TRILIST_201:
706                 reorder = 1;
707                 prim = SAVAGE_PRIM_TRILIST;
708                 fallthrough;
709         case SAVAGE_PRIM_TRILIST:
710                 if (n % 3 != 0) {
711                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
712                         return -EINVAL;
713                 }
714                 break;
715         case SAVAGE_PRIM_TRISTRIP:
716         case SAVAGE_PRIM_TRIFAN:
717                 if (n < 3) {
718                         DRM_ERROR
719                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
720                         return -EINVAL;
721                 }
722                 break;
723         default:
724                 DRM_ERROR("invalid primitive type %u\n", prim);
725                 return -EINVAL;
726         }
727
728         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
729                 if (skip > SAVAGE_SKIP_ALL_S3D) {
730                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
731                         return -EINVAL;
732                 }
733                 vtx_size = 8;   /* full vertex */
734         } else {
735                 if (skip > SAVAGE_SKIP_ALL_S4) {
736                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
737                         return -EINVAL;
738                 }
739                 vtx_size = 10;  /* full vertex */
740         }
741
742         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
743             (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
744             (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
745
746         if (vtx_size > vb_stride) {
747                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
748                           vtx_size, vb_stride);
749                 return -EINVAL;
750         }
751
752         prim <<= 25;
753         while (n != 0) {
754                 /* Can emit up to 255 vertices (85 triangles) at once. */
755                 unsigned int count = n > 255 ? 255 : n;
756
757                 /* Check indices */
758                 for (i = 0; i < count; ++i) {
759                         if (idx[i] > vb_size / (vb_stride * 4)) {
760                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
761                                           i, idx[i], vb_size / (vb_stride * 4));
762                                 return -EINVAL;
763                         }
764                 }
765
766                 if (reorder) {
767                         /* Need to reorder vertices for correct flat
768                          * shading while preserving the clock sense
769                          * for correct culling. Only on Savage3D. */
770                         int reorder[3] = { 2, -1, -1 };
771
772                         BEGIN_DMA(count * vtx_size + 1);
773                         DMA_DRAW_PRIMITIVE(count, prim, skip);
774
775                         for (i = 0; i < count; ++i) {
776                                 unsigned int j = idx[i + reorder[i % 3]];
777                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
778                         }
779
780                         DMA_COMMIT();
781                 } else {
782                         BEGIN_DMA(count * vtx_size + 1);
783                         DMA_DRAW_PRIMITIVE(count, prim, skip);
784
785                         for (i = 0; i < count; ++i) {
786                                 unsigned int j = idx[i];
787                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
788                         }
789
790                         DMA_COMMIT();
791                 }
792
793                 idx += count;
794                 n -= count;
795
796                 prim |= BCI_CMD_DRAW_CONT;
797         }
798
799         return 0;
800 }
801
802 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
803                                  const drm_savage_cmd_header_t * cmd_header,
804                                  const drm_savage_cmd_header_t *data,
805                                  unsigned int nbox,
806                                  const struct drm_clip_rect *boxes)
807 {
808         unsigned int flags = cmd_header->clear0.flags;
809         unsigned int clear_cmd;
810         unsigned int i, nbufs;
811         DMA_LOCALS;
812
813         if (nbox == 0)
814                 return 0;
815
816         clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
817             BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
818         BCI_CMD_SET_ROP(clear_cmd, 0xCC);
819
820         nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
821             ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
822         if (nbufs == 0)
823                 return 0;
824
825         if (data->clear1.mask != 0xffffffff) {
826                 /* set mask */
827                 BEGIN_DMA(2);
828                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
829                 DMA_WRITE(data->clear1.mask);
830                 DMA_COMMIT();
831         }
832         for (i = 0; i < nbox; ++i) {
833                 unsigned int x, y, w, h;
834                 unsigned int buf;
835                 x = boxes[i].x1, y = boxes[i].y1;
836                 w = boxes[i].x2 - boxes[i].x1;
837                 h = boxes[i].y2 - boxes[i].y1;
838                 BEGIN_DMA(nbufs * 6);
839                 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
840                         if (!(flags & buf))
841                                 continue;
842                         DMA_WRITE(clear_cmd);
843                         switch (buf) {
844                         case SAVAGE_FRONT:
845                                 DMA_WRITE(dev_priv->front_offset);
846                                 DMA_WRITE(dev_priv->front_bd);
847                                 break;
848                         case SAVAGE_BACK:
849                                 DMA_WRITE(dev_priv->back_offset);
850                                 DMA_WRITE(dev_priv->back_bd);
851                                 break;
852                         case SAVAGE_DEPTH:
853                                 DMA_WRITE(dev_priv->depth_offset);
854                                 DMA_WRITE(dev_priv->depth_bd);
855                                 break;
856                         }
857                         DMA_WRITE(data->clear1.value);
858                         DMA_WRITE(BCI_X_Y(x, y));
859                         DMA_WRITE(BCI_W_H(w, h));
860                 }
861                 DMA_COMMIT();
862         }
863         if (data->clear1.mask != 0xffffffff) {
864                 /* reset mask */
865                 BEGIN_DMA(2);
866                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
867                 DMA_WRITE(0xffffffff);
868                 DMA_COMMIT();
869         }
870
871         return 0;
872 }
873
874 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
875                                 unsigned int nbox, const struct drm_clip_rect *boxes)
876 {
877         unsigned int swap_cmd;
878         unsigned int i;
879         DMA_LOCALS;
880
881         if (nbox == 0)
882                 return 0;
883
884         swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
885             BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
886         BCI_CMD_SET_ROP(swap_cmd, 0xCC);
887
888         for (i = 0; i < nbox; ++i) {
889                 BEGIN_DMA(6);
890                 DMA_WRITE(swap_cmd);
891                 DMA_WRITE(dev_priv->back_offset);
892                 DMA_WRITE(dev_priv->back_bd);
893                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
894                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
895                 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
896                                   boxes[i].y2 - boxes[i].y1));
897                 DMA_COMMIT();
898         }
899
900         return 0;
901 }
902
903 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
904                                 const drm_savage_cmd_header_t *start,
905                                 const drm_savage_cmd_header_t *end,
906                                 const struct drm_buf * dmabuf,
907                                 const unsigned int *vtxbuf,
908                                 unsigned int vb_size, unsigned int vb_stride,
909                                 unsigned int nbox,
910                                 const struct drm_clip_rect *boxes)
911 {
912         unsigned int i, j;
913         int ret;
914
915         for (i = 0; i < nbox; ++i) {
916                 const drm_savage_cmd_header_t *cmdbuf;
917                 dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
918
919                 cmdbuf = start;
920                 while (cmdbuf < end) {
921                         drm_savage_cmd_header_t cmd_header;
922                         cmd_header = *cmdbuf;
923                         cmdbuf++;
924                         switch (cmd_header.cmd.cmd) {
925                         case SAVAGE_CMD_DMA_PRIM:
926                                 ret = savage_dispatch_dma_prim(
927                                         dev_priv, &cmd_header, dmabuf);
928                                 break;
929                         case SAVAGE_CMD_VB_PRIM:
930                                 ret = savage_dispatch_vb_prim(
931                                         dev_priv, &cmd_header,
932                                         vtxbuf, vb_size, vb_stride);
933                                 break;
934                         case SAVAGE_CMD_DMA_IDX:
935                                 j = (cmd_header.idx.count + 3) / 4;
936                                 /* j was check in savage_bci_cmdbuf */
937                                 ret = savage_dispatch_dma_idx(dev_priv,
938                                         &cmd_header, (const uint16_t *)cmdbuf,
939                                         dmabuf);
940                                 cmdbuf += j;
941                                 break;
942                         case SAVAGE_CMD_VB_IDX:
943                                 j = (cmd_header.idx.count + 3) / 4;
944                                 /* j was check in savage_bci_cmdbuf */
945                                 ret = savage_dispatch_vb_idx(dev_priv,
946                                         &cmd_header, (const uint16_t *)cmdbuf,
947                                         (const uint32_t *)vtxbuf, vb_size,
948                                         vb_stride);
949                                 cmdbuf += j;
950                                 break;
951                         default:
952                                 /* What's the best return code? EFAULT? */
953                                 DRM_ERROR("IMPLEMENTATION ERROR: "
954                                           "non-drawing-command %d\n",
955                                           cmd_header.cmd.cmd);
956                                 return -EINVAL;
957                         }
958
959                         if (ret != 0)
960                                 return ret;
961                 }
962         }
963
964         return 0;
965 }
966
967 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
968 {
969         drm_savage_private_t *dev_priv = dev->dev_private;
970         struct drm_device_dma *dma = dev->dma;
971         struct drm_buf *dmabuf;
972         drm_savage_cmdbuf_t *cmdbuf = data;
973         drm_savage_cmd_header_t *kcmd_addr = NULL;
974         drm_savage_cmd_header_t *first_draw_cmd;
975         unsigned int *kvb_addr = NULL;
976         struct drm_clip_rect *kbox_addr = NULL;
977         unsigned int i, j;
978         int ret = 0;
979
980         DRM_DEBUG("\n");
981
982         LOCK_TEST_WITH_RETURN(dev, file_priv);
983
984         if (dma && dma->buflist) {
985                 if (cmdbuf->dma_idx >= dma->buf_count) {
986                         DRM_ERROR
987                             ("vertex buffer index %u out of range (0-%u)\n",
988                              cmdbuf->dma_idx, dma->buf_count - 1);
989                         return -EINVAL;
990                 }
991                 dmabuf = dma->buflist[cmdbuf->dma_idx];
992         } else {
993                 dmabuf = NULL;
994         }
995
996         /* Copy the user buffers into kernel temporary areas.  This hasn't been
997          * a performance loss compared to VERIFYAREA_READ/
998          * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
999          * for locking on FreeBSD.
1000          */
1001         if (cmdbuf->size) {
1002                 kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL);
1003                 if (kcmd_addr == NULL)
1004                         return -ENOMEM;
1005
1006                 if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr,
1007                                        cmdbuf->size * 8))
1008                 {
1009                         kfree(kcmd_addr);
1010                         return -EFAULT;
1011                 }
1012                 cmdbuf->cmd_addr = kcmd_addr;
1013         }
1014         if (cmdbuf->vb_size) {
1015                 kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size);
1016                 if (IS_ERR(kvb_addr)) {
1017                         ret = PTR_ERR(kvb_addr);
1018                         kvb_addr = NULL;
1019                         goto done;
1020                 }
1021                 cmdbuf->vb_addr = kvb_addr;
1022         }
1023         if (cmdbuf->nbox) {
1024                 kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect),
1025                                           GFP_KERNEL);
1026                 if (kbox_addr == NULL) {
1027                         ret = -ENOMEM;
1028                         goto done;
1029                 }
1030
1031                 if (copy_from_user(kbox_addr, cmdbuf->box_addr,
1032                                        cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
1033                         ret = -EFAULT;
1034                         goto done;
1035                 }
1036         cmdbuf->box_addr = kbox_addr;
1037         }
1038
1039         /* Make sure writes to DMA buffers are finished before sending
1040          * DMA commands to the graphics hardware. */
1041         mb();
1042
1043         /* Coming from user space. Don't know if the Xserver has
1044          * emitted wait commands. Assuming the worst. */
1045         dev_priv->waiting = 1;
1046
1047         i = 0;
1048         first_draw_cmd = NULL;
1049         while (i < cmdbuf->size) {
1050                 drm_savage_cmd_header_t cmd_header;
1051                 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
1052                 cmdbuf->cmd_addr++;
1053                 i++;
1054
1055                 /* Group drawing commands with same state to minimize
1056                  * iterations over clip rects. */
1057                 j = 0;
1058                 switch (cmd_header.cmd.cmd) {
1059                 case SAVAGE_CMD_DMA_IDX:
1060                 case SAVAGE_CMD_VB_IDX:
1061                         j = (cmd_header.idx.count + 3) / 4;
1062                         if (i + j > cmdbuf->size) {
1063                                 DRM_ERROR("indexed drawing command extends "
1064                                           "beyond end of command buffer\n");
1065                                 DMA_FLUSH();
1066                                 ret = -EINVAL;
1067                                 goto done;
1068                         }
1069                         fallthrough;
1070                 case SAVAGE_CMD_DMA_PRIM:
1071                 case SAVAGE_CMD_VB_PRIM:
1072                         if (!first_draw_cmd)
1073                                 first_draw_cmd = cmdbuf->cmd_addr - 1;
1074                         cmdbuf->cmd_addr += j;
1075                         i += j;
1076                         break;
1077                 default:
1078                         if (first_draw_cmd) {
1079                                 ret = savage_dispatch_draw(
1080                                       dev_priv, first_draw_cmd,
1081                                       cmdbuf->cmd_addr - 1,
1082                                       dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
1083                                       cmdbuf->vb_stride,
1084                                       cmdbuf->nbox, cmdbuf->box_addr);
1085                                 if (ret != 0)
1086                                         goto done;
1087                                 first_draw_cmd = NULL;
1088                         }
1089                 }
1090                 if (first_draw_cmd)
1091                         continue;
1092
1093                 switch (cmd_header.cmd.cmd) {
1094                 case SAVAGE_CMD_STATE:
1095                         j = (cmd_header.state.count + 1) / 2;
1096                         if (i + j > cmdbuf->size) {
1097                                 DRM_ERROR("command SAVAGE_CMD_STATE extends "
1098                                           "beyond end of command buffer\n");
1099                                 DMA_FLUSH();
1100                                 ret = -EINVAL;
1101                                 goto done;
1102                         }
1103                         ret = savage_dispatch_state(dev_priv, &cmd_header,
1104                                 (const uint32_t *)cmdbuf->cmd_addr);
1105                         cmdbuf->cmd_addr += j;
1106                         i += j;
1107                         break;
1108                 case SAVAGE_CMD_CLEAR:
1109                         if (i + 1 > cmdbuf->size) {
1110                                 DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
1111                                           "beyond end of command buffer\n");
1112                                 DMA_FLUSH();
1113                                 ret = -EINVAL;
1114                                 goto done;
1115                         }
1116                         ret = savage_dispatch_clear(dev_priv, &cmd_header,
1117                                                     cmdbuf->cmd_addr,
1118                                                     cmdbuf->nbox,
1119                                                     cmdbuf->box_addr);
1120                         cmdbuf->cmd_addr++;
1121                         i++;
1122                         break;
1123                 case SAVAGE_CMD_SWAP:
1124                         ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
1125                                                    cmdbuf->box_addr);
1126                         break;
1127                 default:
1128                         DRM_ERROR("invalid command 0x%x\n",
1129                                   cmd_header.cmd.cmd);
1130                         DMA_FLUSH();
1131                         ret = -EINVAL;
1132                         goto done;
1133                 }
1134
1135                 if (ret != 0) {
1136                         DMA_FLUSH();
1137                         goto done;
1138                 }
1139         }
1140
1141         if (first_draw_cmd) {
1142                 ret = savage_dispatch_draw (
1143                         dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
1144                         cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
1145                         cmdbuf->nbox, cmdbuf->box_addr);
1146                 if (ret != 0) {
1147                         DMA_FLUSH();
1148                         goto done;
1149                 }
1150         }
1151
1152         DMA_FLUSH();
1153
1154         if (dmabuf && cmdbuf->discard) {
1155                 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
1156                 uint16_t event;
1157                 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
1158                 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
1159                 savage_freelist_put(dev, dmabuf);
1160         }
1161
1162 done:
1163         /* If we didn't need to allocate them, these'll be NULL */
1164         kfree(kcmd_addr);
1165         kfree(kvb_addr);
1166         kfree(kbox_addr);
1167
1168         return ret;
1169 }