drivers/gpu/drm/i915/gt/shaders/clear_kernel/ivb.asm

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2020 Intel Corporation
   4  */
   5
   6 /*
   7  * Kernel for PAVP buffer clear.
   8  *
   9  *      1. Clear all 64 GRF registers assigned to the kernel with designated value;
  10  *      2. Write 32x16 block of all "0" to render target buffer which indirectly clears
  11  *         512 bytes of Render Cache.
  12  */
  13
  14 /* Store designated "clear GRF" value */
  15 mov(1)          f0.1<1>UW       g1.2<0,1,0>UW                   { align1 1N };
  16
  17 /**
  18  * Curbe Format
  19  *
  20  * DW 1.0 - Block Offset to write Render Cache
  21  * DW 1.1 [15:0] - Clear Word
  22  * DW 1.2 - Delay iterations
  23  * DW 1.3 - Enable Instrumentation (only for debug)
  24  * DW 1.4 - Rsvd (intended for context ID)
  25  * DW 1.5 - [31:16]:SliceCount, [15:0]:SubSlicePerSliceCount
  26  * DW 1.6 - Rsvd MBZ (intended for Enable Wait on Total Thread Count)
  27  * DW 1.7 - Rsvd MBZ (inteded for Total Thread Count)
  28  *
  29  * Binding Table
  30  *
  31  * BTI 0: 2D Surface to help clear L3 (Render/Data Cache)
  32  * BTI 1: Wait/Instrumentation Buffer
  33  *  Size : (SliceCount * SubSliceCount  * 16 EUs/SubSlice) rows * (16 threads/EU) cols (Format R32_UINT)
  34  *         Expected to be initialized to 0 by driver/another kernel
  35  *  Layout :
  36  *           RowN: Histogram for EU-N: (SliceID*SubSlicePerSliceCount + SSID)*16 + EUID [assume max 16 EUs / SS]
  37  *           Col-k[DW-k]: Threads Executed on ThreadID-k for EU-N
  38  */
  39 add(1)          g1.2<1>UD       g1.2<0,1,0>UD   0x00000001UD    { align1 1N }; /* Loop count to delay kernel: Init to (g1.2 + 1) */
  40 cmp.z.f0.0(1)   null<1>UD       g1.3<0,1,0>UD   0x00000000UD    { align1 1N };
  41 (+f0.0) jmpi(1) 44D                                             { align1 WE_all 1N };
  42
  43 /**
  44  * State Register has info on where this thread is running
  45  *      IVB: sr0.0 :: [15:13]: MBZ, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
  46  *      HSW: sr0.0 :: 15: MBZ, [14:13]: SliceID, 12: HSID (Half-Slice ID), [11:8]EUID, [2:0] ThreadSlotID
  47  */
  48 mov(8)          g3<1>UD         0x00000000UD                    { align1 1Q };
  49 shr(1)          g3<1>D          sr0<0,1,0>D     12D             { align1 1N };
  50 and(1)          g3<1>D          g3<0,1,0>D      1D              { align1 1N }; /* g3 has HSID */
  51 shr(1)          g3.1<1>D        sr0<0,1,0>D     13D             { align1 1N };
  52 and(1)          g3.1<1>D        g3.1<0,1,0>D    3D              { align1 1N }; /* g3.1 has sliceID */
  53 mul(1)          g3.5<1>D        g3.1<0,1,0>D    g1.10<0,1,0>UW  { align1 1N };
  54 add(1)          g3<1>D          g3<0,1,0>D      g3.5<0,1,0>D    { align1 1N }; /* g3 = sliceID * SubSlicePerSliceCount + HSID */
  55 shr(1)          g3.2<1>D        sr0<0,1,0>D     8D              { align1 1N };
  56 and(1)          g3.2<1>D        g3.2<0,1,0>D    15D             { align1 1N }; /* g3.2 = EUID */
  57 mul(1)          g3.4<1>D        g3<0,1,0>D      16D             { align1 1N };
  58 add(1)          g3.2<1>D        g3.2<0,1,0>D    g3.4<0,1,0>D    { align1 1N }; /* g3.2 now points to EU row number (Y-pixel = V address )  in instrumentation surf */
  59
  60 mov(8)          g5<1>UD         0x00000000UD                    { align1 1Q };
  61 and(1)          g3.3<1>D        sr0<0,1,0>D     7D              { align1 1N };
  62 mul(1)          g3.3<1>D        g3.3<0,1,0>D    4D              { align1 1N };
  63
  64 mov(8)          g4<1>UD         g0<8,8,1>UD                     { align1 1Q }; /* Initialize message header with g0 */
  65 mov(1)          g4<1>UD         g3.3<0,1,0>UD                   { align1 1N }; /* Block offset */
  66 mov(1)          g4.1<1>UD       g3.2<0,1,0>UD                   { align1 1N }; /* Block offset */
  67 mov(1)          g4.2<1>UD       0x00000003UD                    { align1 1N }; /* Block size (1 row x 4 bytes) */
  68 and(1)          g4.3<1>UD       g4.3<0,1,0>UW   0xffffffffUD    { align1 1N };
  69
  70 /* Media block read to fetch current value at specified location in instrumentation buffer */
  71 sendc(8)        g5<1>UD         g4<8,8,1>F      0x02190001
  72                             render MsgDesc: media block read MsgCtrl = 0x0 Surface = 1 mlen 1 rlen 1 { align1 1Q };
  73 add(1)          g5<1>D          g5<0,1,0>D      1D              { align1 1N };
  74
  75 /* Media block write for updated value at specified location in instrumentation buffer */
  76 sendc(8)        g5<1>UD         g4<8,8,1>F      0x040a8001
  77                             render MsgDesc: media block write MsgCtrl = 0x0 Surface = 1 mlen 2 rlen 0 { align1 1Q };
  78 /* Delay thread for specified parameter */
  79 add.nz.f0.0(1)  g1.2<1>UD       g1.2<0,1,0>UD   -1D             { align1 1N };
  80 (+f0.0) jmpi(1) -4D                                             { align1 WE_all 1N };
  81
  82 /* Store designated "clear GRF" value */
  83 mov(1)          f0.1<1>UW       g1.2<0,1,0>UW                   { align1 1N };
  84
  85 /* Initialize looping parameters */
  86 mov(1)          a0<1>D          0D                              { align1 1N }; /* Initialize a0.0:w=0 */
  87 mov(1)          a0.4<1>W        127W                            { align1 1N }; /* Loop count. Each loop contains 16 GRF's */
  88
  89 /* Write 32x16 all "0" block */
  90 mov(8)          g2<1>UD         g0<8,8,1>UD                     { align1 1Q };
  91 mov(8)          g127<1>UD       g0<8,8,1>UD                     { align1 1Q };
  92 mov(2)          g2<1>UD         g1<2,2,1>UW                     { align1 1N };
  93 mov(1)          g2.2<1>UD       0x000f000fUD                    { align1 1N }; /* Block size (16x16) */
  94 and(1)          g2.3<1>UD       g2.3<0,1,0>UW   0xffffffefUD    { align1 1N };
  95 mov(16)         g3<1>UD         0x00000000UD                    { align1 1H };
  96 mov(16)         g4<1>UD         0x00000000UD                    { align1 1H };
  97 mov(16)         g5<1>UD         0x00000000UD                    { align1 1H };
  98 mov(16)         g6<1>UD         0x00000000UD                    { align1 1H };
  99 mov(16)         g7<1>UD         0x00000000UD                    { align1 1H };
 100 mov(16)         g8<1>UD         0x00000000UD                    { align1 1H };
 101 mov(16)         g9<1>UD         0x00000000UD                    { align1 1H };
 102 mov(16)         g10<1>UD        0x00000000UD                    { align1 1H };
 103 sendc(8)        null<1>UD       g2<8,8,1>F      0x120a8000
 104                             render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
 105 add(1)          g2<1>UD         g1<0,1,0>UW     0x0010UW        { align1 1N };
 106 sendc(8)        null<1>UD       g2<8,8,1>F      0x120a8000
 107                             render MsgDesc: media block write MsgCtrl = 0x0 Surface = 0 mlen 9 rlen 0 { align1 1Q };
 108
 109 /* Now, clear all GRF registers */
 110 add.nz.f0.0(1)  a0.4<1>W        a0.4<0,1,0>W    -1W             { align1 1N };
 111 mov(16)         g[a0]<1>UW      f0.1<0,1,0>UW                   { align1 1H };
 112 add(1)          a0<1>D          a0<0,1,0>D      32D             { align1 1N };
 113 (+f0.0) jmpi(1) -8D                                             { align1 WE_all 1N };
 114
 115 /* Terminante the thread */
 116 sendc(8)        null<1>UD       g127<8,8,1>F    0x82000010
 117                             thread_spawner MsgDesc: mlen 1 rlen 0           { align1 1Q EOT };