Merge tag 'hardening-v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_dbgdev.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/types.h>
25 #include <linux/kernel.h>
26 #include <linux/log2.h>
27 #include <linux/sched.h>
28 #include <linux/slab.h>
29 #include <linux/mutex.h>
30 #include <linux/device.h>
31
32 #include "kfd_pm4_headers.h"
33 #include "kfd_pm4_headers_diq.h"
34 #include "kfd_kernel_queue.h"
35 #include "kfd_priv.h"
36 #include "kfd_pm4_opcodes.h"
37 #include "cik_regs.h"
38 #include "kfd_dbgmgr.h"
39 #include "kfd_dbgdev.h"
40 #include "kfd_device_queue_manager.h"
41
42 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
43 {
44         dev->kfd2kgd->address_watch_disable(dev->kgd);
45 }
46
47 static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
48                                 u32 pasid, uint64_t vmid0_address,
49                                 uint32_t *packet_buff, size_t size_in_bytes)
50 {
51         struct pm4__release_mem *rm_packet;
52         struct pm4__indirect_buffer_pasid *ib_packet;
53         struct kfd_mem_obj *mem_obj;
54         size_t pq_packets_size_in_bytes;
55         union ULARGE_INTEGER *largep;
56         union ULARGE_INTEGER addr;
57         struct kernel_queue *kq;
58         uint64_t *rm_state;
59         unsigned int *ib_packet_buff;
60         int status;
61
62         if (WARN_ON(!size_in_bytes))
63                 return -EINVAL;
64
65         kq = dbgdev->kq;
66
67         pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
68                                 sizeof(struct pm4__indirect_buffer_pasid);
69
70         /*
71          * We acquire a buffer from DIQ
72          * The receive packet buff will be sitting on the Indirect Buffer
73          * and in the PQ we put the IB packet + sync packet(s).
74          */
75         status = kq_acquire_packet_buffer(kq,
76                                 pq_packets_size_in_bytes / sizeof(uint32_t),
77                                 &ib_packet_buff);
78         if (status) {
79                 pr_err("kq_acquire_packet_buffer failed\n");
80                 return status;
81         }
82
83         memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
84
85         ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
86
87         ib_packet->header.count = 3;
88         ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
89         ib_packet->header.type = PM4_TYPE_3;
90
91         largep = (union ULARGE_INTEGER *) &vmid0_address;
92
93         ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
94         ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
95
96         ib_packet->control = (1 << 23) | (1 << 31) |
97                         ((size_in_bytes / 4) & 0xfffff);
98
99         ib_packet->bitfields5.pasid = pasid;
100
101         /*
102          * for now we use release mem for GPU-CPU synchronization
103          * Consider WaitRegMem + WriteData as a better alternative
104          * we get a GART allocations ( gpu/cpu mapping),
105          * for the sync variable, and wait until:
106          * (a) Sync with HW
107          * (b) Sync var is written by CP to mem.
108          */
109         rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
110                         (sizeof(struct pm4__indirect_buffer_pasid) /
111                                         sizeof(unsigned int)));
112
113         status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
114                                         &mem_obj);
115
116         if (status) {
117                 pr_err("Failed to allocate GART memory\n");
118                 kq_rollback_packet(kq);
119                 return status;
120         }
121
122         rm_state = (uint64_t *) mem_obj->cpu_ptr;
123
124         *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
125
126         rm_packet->header.opcode = IT_RELEASE_MEM;
127         rm_packet->header.type = PM4_TYPE_3;
128         rm_packet->header.count = sizeof(struct pm4__release_mem) / 4 - 2;
129
130         rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
131         rm_packet->bitfields2.event_index =
132                                 event_index___release_mem__end_of_pipe;
133
134         rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
135         rm_packet->bitfields2.atc = 0;
136         rm_packet->bitfields2.tc_wb_action_ena = 1;
137
138         addr.quad_part = mem_obj->gpu_addr;
139
140         rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
141         rm_packet->address_hi = addr.u.high_part;
142
143         rm_packet->bitfields3.data_sel =
144                                 data_sel___release_mem__send_64_bit_data;
145
146         rm_packet->bitfields3.int_sel =
147                         int_sel___release_mem__send_data_after_write_confirm;
148
149         rm_packet->bitfields3.dst_sel =
150                         dst_sel___release_mem__memory_controller;
151
152         rm_packet->data_lo = QUEUESTATE__ACTIVE;
153
154         kq_submit_packet(kq);
155
156         /* Wait till CP writes sync code: */
157         status = amdkfd_fence_wait_timeout(
158                         rm_state,
159                         QUEUESTATE__ACTIVE, 1500);
160
161         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
162
163         return status;
164 }
165
166 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
167 {
168         /*
169          * no action is needed in this case,
170          * just make sure diq will not be used
171          */
172
173         dbgdev->kq = NULL;
174
175         return 0;
176 }
177
178 static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
179 {
180         struct queue_properties properties;
181         unsigned int qid;
182         struct kernel_queue *kq = NULL;
183         int status;
184
185         properties.type = KFD_QUEUE_TYPE_DIQ;
186
187         status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
188                                 &properties, &qid, NULL);
189
190         if (status) {
191                 pr_err("Failed to create DIQ\n");
192                 return status;
193         }
194
195         pr_debug("DIQ Created with queue id: %d\n", qid);
196
197         kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
198
199         if (!kq) {
200                 pr_err("Error getting DIQ\n");
201                 pqm_destroy_queue(dbgdev->pqm, qid);
202                 return -EFAULT;
203         }
204
205         dbgdev->kq = kq;
206
207         return status;
208 }
209
210 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
211 {
212         /* disable watch address */
213         dbgdev_address_watch_disable_nodiq(dbgdev->dev);
214         return 0;
215 }
216
217 static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
218 {
219         /* todo - disable address watch */
220         int status;
221
222         status = pqm_destroy_queue(dbgdev->pqm,
223                         dbgdev->kq->queue->properties.queue_id);
224         dbgdev->kq = NULL;
225
226         return status;
227 }
228
229 static void dbgdev_address_watch_set_registers(
230                         const struct dbg_address_watch_info *adw_info,
231                         union TCP_WATCH_ADDR_H_BITS *addrHi,
232                         union TCP_WATCH_ADDR_L_BITS *addrLo,
233                         union TCP_WATCH_CNTL_BITS *cntl,
234                         unsigned int index, unsigned int vmid)
235 {
236         union ULARGE_INTEGER addr;
237
238         addr.quad_part = 0;
239         addrHi->u32All = 0;
240         addrLo->u32All = 0;
241         cntl->u32All = 0;
242
243         if (adw_info->watch_mask)
244                 cntl->bitfields.mask =
245                         (uint32_t) (adw_info->watch_mask[index] &
246                                         ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
247         else
248                 cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
249
250         addr.quad_part = (unsigned long long) adw_info->watch_address[index];
251
252         addrHi->bitfields.addr = addr.u.high_part &
253                                         ADDRESS_WATCH_REG_ADDHIGH_MASK;
254         addrLo->bitfields.addr =
255                         (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
256
257         cntl->bitfields.mode = adw_info->watch_mode[index];
258         cntl->bitfields.vmid = (uint32_t) vmid;
259         /* for now assume it is an ATC address */
260         cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
261
262         pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
263         pr_debug("\t\t%20s %08x\n", "set reg add high :",
264                         addrHi->bitfields.addr);
265         pr_debug("\t\t%20s %08x\n", "set reg add low :",
266                         addrLo->bitfields.addr);
267 }
268
269 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
270                                       struct dbg_address_watch_info *adw_info)
271 {
272         union TCP_WATCH_ADDR_H_BITS addrHi;
273         union TCP_WATCH_ADDR_L_BITS addrLo;
274         union TCP_WATCH_CNTL_BITS cntl;
275         struct kfd_process_device *pdd;
276         unsigned int i;
277
278         /* taking the vmid for that process on the safe way using pdd */
279         pdd = kfd_get_process_device_data(dbgdev->dev,
280                                         adw_info->process);
281         if (!pdd) {
282                 pr_err("Failed to get pdd for wave control no DIQ\n");
283                 return -EFAULT;
284         }
285
286         addrHi.u32All = 0;
287         addrLo.u32All = 0;
288         cntl.u32All = 0;
289
290         if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
291                         (adw_info->num_watch_points == 0)) {
292                 pr_err("num_watch_points is invalid\n");
293                 return -EINVAL;
294         }
295
296         if (!adw_info->watch_mode || !adw_info->watch_address) {
297                 pr_err("adw_info fields are not valid\n");
298                 return -EINVAL;
299         }
300
301         for (i = 0; i < adw_info->num_watch_points; i++) {
302                 dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
303                                                 &cntl, i, pdd->qpd.vmid);
304
305                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
306                 pr_debug("\t\t%20s %08x\n", "register index :", i);
307                 pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
308                 pr_debug("\t\t%20s %08x\n", "Address Low is :",
309                                 addrLo.bitfields.addr);
310                 pr_debug("\t\t%20s %08x\n", "Address high is :",
311                                 addrHi.bitfields.addr);
312                 pr_debug("\t\t%20s %08x\n", "Address high is :",
313                                 addrHi.bitfields.addr);
314                 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
315                                 cntl.bitfields.mask);
316                 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
317                                 cntl.bitfields.mode);
318                 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
319                                 cntl.bitfields.vmid);
320                 pr_debug("\t\t%20s %08x\n", "Control atc  is :",
321                                 cntl.bitfields.atc);
322                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
323
324                 pdd->dev->kfd2kgd->address_watch_execute(
325                                                 dbgdev->dev->kgd,
326                                                 i,
327                                                 cntl.u32All,
328                                                 addrHi.u32All,
329                                                 addrLo.u32All);
330         }
331
332         return 0;
333 }
334
335 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
336                                     struct dbg_address_watch_info *adw_info)
337 {
338         struct pm4__set_config_reg *packets_vec;
339         union TCP_WATCH_ADDR_H_BITS addrHi;
340         union TCP_WATCH_ADDR_L_BITS addrLo;
341         union TCP_WATCH_CNTL_BITS cntl;
342         struct kfd_mem_obj *mem_obj;
343         unsigned int aw_reg_add_dword;
344         uint32_t *packet_buff_uint;
345         unsigned int i;
346         int status;
347         size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
348         /* we do not control the vmid in DIQ mode, just a place holder */
349         unsigned int vmid = 0;
350
351         addrHi.u32All = 0;
352         addrLo.u32All = 0;
353         cntl.u32All = 0;
354
355         if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
356                         (adw_info->num_watch_points == 0)) {
357                 pr_err("num_watch_points is invalid\n");
358                 return -EINVAL;
359         }
360
361         if (!adw_info->watch_mode || !adw_info->watch_address) {
362                 pr_err("adw_info fields are not valid\n");
363                 return -EINVAL;
364         }
365
366         status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
367
368         if (status) {
369                 pr_err("Failed to allocate GART memory\n");
370                 return status;
371         }
372
373         packet_buff_uint = mem_obj->cpu_ptr;
374
375         memset(packet_buff_uint, 0, ib_size);
376
377         packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
378
379         packets_vec[0].header.count = 1;
380         packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
381         packets_vec[0].header.type = PM4_TYPE_3;
382         packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
383         packets_vec[0].bitfields2.insert_vmid = 1;
384         packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
385         packets_vec[1].bitfields2.insert_vmid = 0;
386         packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
387         packets_vec[2].bitfields2.insert_vmid = 0;
388         packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
389         packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
390         packets_vec[3].bitfields2.insert_vmid = 1;
391
392         for (i = 0; i < adw_info->num_watch_points; i++) {
393                 dbgdev_address_watch_set_registers(adw_info,
394                                                 &addrHi,
395                                                 &addrLo,
396                                                 &cntl,
397                                                 i,
398                                                 vmid);
399
400                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
401                 pr_debug("\t\t%20s %08x\n", "register index :", i);
402                 pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
403                 pr_debug("\t\t%20s %p\n", "Add ptr is :",
404                                 adw_info->watch_address);
405                 pr_debug("\t\t%20s %08llx\n", "Add     is :",
406                                 adw_info->watch_address[i]);
407                 pr_debug("\t\t%20s %08x\n", "Address Low is :",
408                                 addrLo.bitfields.addr);
409                 pr_debug("\t\t%20s %08x\n", "Address high is :",
410                                 addrHi.bitfields.addr);
411                 pr_debug("\t\t%20s %08x\n", "Control Mask is :",
412                                 cntl.bitfields.mask);
413                 pr_debug("\t\t%20s %08x\n", "Control Mode is :",
414                                 cntl.bitfields.mode);
415                 pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
416                                 cntl.bitfields.vmid);
417                 pr_debug("\t\t%20s %08x\n", "Control atc  is :",
418                                 cntl.bitfields.atc);
419                 pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
420
421                 aw_reg_add_dword =
422                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
423                                         dbgdev->dev->kgd,
424                                         i,
425                                         ADDRESS_WATCH_REG_CNTL);
426
427                 packets_vec[0].bitfields2.reg_offset =
428                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
429
430                 packets_vec[0].reg_data[0] = cntl.u32All;
431
432                 aw_reg_add_dword =
433                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
434                                         dbgdev->dev->kgd,
435                                         i,
436                                         ADDRESS_WATCH_REG_ADDR_HI);
437
438                 packets_vec[1].bitfields2.reg_offset =
439                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
440                 packets_vec[1].reg_data[0] = addrHi.u32All;
441
442                 aw_reg_add_dword =
443                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
444                                         dbgdev->dev->kgd,
445                                         i,
446                                         ADDRESS_WATCH_REG_ADDR_LO);
447
448                 packets_vec[2].bitfields2.reg_offset =
449                                 aw_reg_add_dword - AMD_CONFIG_REG_BASE;
450                 packets_vec[2].reg_data[0] = addrLo.u32All;
451
452                 /* enable watch flag if address is not zero*/
453                 if (adw_info->watch_address[i] > 0)
454                         cntl.bitfields.valid = 1;
455                 else
456                         cntl.bitfields.valid = 0;
457
458                 aw_reg_add_dword =
459                                 dbgdev->dev->kfd2kgd->address_watch_get_offset(
460                                         dbgdev->dev->kgd,
461                                         i,
462                                         ADDRESS_WATCH_REG_CNTL);
463
464                 packets_vec[3].bitfields2.reg_offset =
465                                         aw_reg_add_dword - AMD_CONFIG_REG_BASE;
466                 packets_vec[3].reg_data[0] = cntl.u32All;
467
468                 status = dbgdev_diq_submit_ib(
469                                         dbgdev,
470                                         adw_info->process->pasid,
471                                         mem_obj->gpu_addr,
472                                         packet_buff_uint,
473                                         ib_size);
474
475                 if (status) {
476                         pr_err("Failed to submit IB to DIQ\n");
477                         break;
478                 }
479         }
480
481         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
482         return status;
483 }
484
485 static int dbgdev_wave_control_set_registers(
486                                 struct dbg_wave_control_info *wac_info,
487                                 union SQ_CMD_BITS *in_reg_sq_cmd,
488                                 union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
489 {
490         int status = 0;
491         union SQ_CMD_BITS reg_sq_cmd;
492         union GRBM_GFX_INDEX_BITS reg_gfx_index;
493         struct HsaDbgWaveMsgAMDGen2 *pMsg;
494
495         reg_sq_cmd.u32All = 0;
496         reg_gfx_index.u32All = 0;
497         pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
498
499         switch (wac_info->mode) {
500         /* Send command to single wave */
501         case HSA_DBG_WAVEMODE_SINGLE:
502                 /*
503                  * Limit access to the process waves only,
504                  * by setting vmid check
505                  */
506                 reg_sq_cmd.bits.check_vmid = 1;
507                 reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
508                 reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
509                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
510
511                 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
512                 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
513                 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
514
515                 break;
516
517         /* Send command to all waves with matching VMID */
518         case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
519
520                 reg_gfx_index.bits.sh_broadcast_writes = 1;
521                 reg_gfx_index.bits.se_broadcast_writes = 1;
522                 reg_gfx_index.bits.instance_broadcast_writes = 1;
523
524                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
525
526                 break;
527
528         /* Send command to all CU waves with matching VMID */
529         case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
530
531                 reg_sq_cmd.bits.check_vmid = 1;
532                 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
533
534                 reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
535                 reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
536                 reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
537
538                 break;
539
540         default:
541                 return -EINVAL;
542         }
543
544         switch (wac_info->operand) {
545         case HSA_DBG_WAVEOP_HALT:
546                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
547                 break;
548
549         case HSA_DBG_WAVEOP_RESUME:
550                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
551                 break;
552
553         case HSA_DBG_WAVEOP_KILL:
554                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
555                 break;
556
557         case HSA_DBG_WAVEOP_DEBUG:
558                 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
559                 break;
560
561         case HSA_DBG_WAVEOP_TRAP:
562                 if (wac_info->trapId < MAX_TRAPID) {
563                         reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
564                         reg_sq_cmd.bits.trap_id = wac_info->trapId;
565                 } else {
566                         status = -EINVAL;
567                 }
568                 break;
569
570         default:
571                 status = -EINVAL;
572                 break;
573         }
574
575         if (status == 0) {
576                 *in_reg_sq_cmd = reg_sq_cmd;
577                 *in_reg_gfx_index = reg_gfx_index;
578         }
579
580         return status;
581 }
582
583 static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
584                                         struct dbg_wave_control_info *wac_info)
585 {
586
587         int status;
588         union SQ_CMD_BITS reg_sq_cmd;
589         union GRBM_GFX_INDEX_BITS reg_gfx_index;
590         struct kfd_mem_obj *mem_obj;
591         uint32_t *packet_buff_uint;
592         struct pm4__set_config_reg *packets_vec;
593         size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
594
595         reg_sq_cmd.u32All = 0;
596
597         status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
598                                                         &reg_gfx_index);
599         if (status) {
600                 pr_err("Failed to set wave control registers\n");
601                 return status;
602         }
603
604         /* we do not control the VMID in DIQ, so reset it to a known value */
605         reg_sq_cmd.bits.vm_id = 0;
606
607         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
608
609         pr_debug("\t\t mode      is: %u\n", wac_info->mode);
610         pr_debug("\t\t operand   is: %u\n", wac_info->operand);
611         pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
612         pr_debug("\t\t msg value is: %u\n",
613                         wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
614         pr_debug("\t\t vmid      is: N/A\n");
615
616         pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
617         pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
618         pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
619         pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
620         pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
621         pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
622         pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
623
624         pr_debug("\t\t ibw       is : %u\n",
625                         reg_gfx_index.bitfields.instance_broadcast_writes);
626         pr_debug("\t\t ii        is : %u\n",
627                         reg_gfx_index.bitfields.instance_index);
628         pr_debug("\t\t sebw      is : %u\n",
629                         reg_gfx_index.bitfields.se_broadcast_writes);
630         pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
631         pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
632         pr_debug("\t\t sbw       is : %u\n",
633                         reg_gfx_index.bitfields.sh_broadcast_writes);
634
635         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
636
637         status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
638
639         if (status != 0) {
640                 pr_err("Failed to allocate GART memory\n");
641                 return status;
642         }
643
644         packet_buff_uint = mem_obj->cpu_ptr;
645
646         memset(packet_buff_uint, 0, ib_size);
647
648         packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
649         packets_vec[0].header.count = 1;
650         packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
651         packets_vec[0].header.type = PM4_TYPE_3;
652         packets_vec[0].bitfields2.reg_offset =
653                         GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
654
655         packets_vec[0].bitfields2.insert_vmid = 0;
656         packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
657
658         packets_vec[1].header.count = 1;
659         packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
660         packets_vec[1].header.type = PM4_TYPE_3;
661         packets_vec[1].bitfields2.reg_offset = SQ_CMD / 4 - AMD_CONFIG_REG_BASE;
662
663         packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
664         packets_vec[1].bitfields2.insert_vmid = 1;
665         packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
666
667         /* Restore the GRBM_GFX_INDEX register */
668
669         reg_gfx_index.u32All = 0;
670         reg_gfx_index.bits.sh_broadcast_writes = 1;
671         reg_gfx_index.bits.instance_broadcast_writes = 1;
672         reg_gfx_index.bits.se_broadcast_writes = 1;
673
674
675         packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
676         packets_vec[2].bitfields2.reg_offset =
677                                 GRBM_GFX_INDEX / 4 - USERCONFIG_REG_BASE;
678
679         packets_vec[2].bitfields2.insert_vmid = 0;
680         packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
681
682         status = dbgdev_diq_submit_ib(
683                         dbgdev,
684                         wac_info->process->pasid,
685                         mem_obj->gpu_addr,
686                         packet_buff_uint,
687                         ib_size);
688
689         if (status)
690                 pr_err("Failed to submit IB to DIQ\n");
691
692         kfd_gtt_sa_free(dbgdev->dev, mem_obj);
693
694         return status;
695 }
696
697 static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
698                                         struct dbg_wave_control_info *wac_info)
699 {
700         int status;
701         union SQ_CMD_BITS reg_sq_cmd;
702         union GRBM_GFX_INDEX_BITS reg_gfx_index;
703         struct kfd_process_device *pdd;
704
705         reg_sq_cmd.u32All = 0;
706
707         /* taking the VMID for that process on the safe way using PDD */
708         pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
709
710         if (!pdd) {
711                 pr_err("Failed to get pdd for wave control no DIQ\n");
712                 return -EFAULT;
713         }
714         status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
715                                                         &reg_gfx_index);
716         if (status) {
717                 pr_err("Failed to set wave control registers\n");
718                 return status;
719         }
720
721         /* for non DIQ we need to patch the VMID: */
722
723         reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
724
725         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
726
727         pr_debug("\t\t mode      is: %u\n", wac_info->mode);
728         pr_debug("\t\t operand   is: %u\n", wac_info->operand);
729         pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
730         pr_debug("\t\t msg value is: %u\n",
731                         wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
732         pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
733
734         pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
735         pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
736         pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
737         pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
738         pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
739         pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
740         pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
741
742         pr_debug("\t\t ibw       is : %u\n",
743                         reg_gfx_index.bitfields.instance_broadcast_writes);
744         pr_debug("\t\t ii        is : %u\n",
745                         reg_gfx_index.bitfields.instance_index);
746         pr_debug("\t\t sebw      is : %u\n",
747                         reg_gfx_index.bitfields.se_broadcast_writes);
748         pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
749         pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
750         pr_debug("\t\t sbw       is : %u\n",
751                         reg_gfx_index.bitfields.sh_broadcast_writes);
752
753         pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
754
755         return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
756                                                         reg_gfx_index.u32All,
757                                                         reg_sq_cmd.u32All);
758 }
759
760 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
761 {
762         int status = 0;
763         unsigned int vmid;
764         uint16_t queried_pasid;
765         union SQ_CMD_BITS reg_sq_cmd;
766         union GRBM_GFX_INDEX_BITS reg_gfx_index;
767         struct kfd_process_device *pdd;
768         struct dbg_wave_control_info wac_info;
769         int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
770         int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
771
772         reg_sq_cmd.u32All = 0;
773         status = 0;
774
775         wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
776         wac_info.operand = HSA_DBG_WAVEOP_KILL;
777
778         pr_debug("Killing all process wavefronts\n");
779
780         /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
781          * ATC_VMID15_PASID_MAPPING
782          * to check which VMID the current process is mapped to.
783          */
784
785         for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
786                 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
787                                 (dev->kgd, vmid, &queried_pasid);
788
789                 if (status && queried_pasid == p->pasid) {
790                         pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
791                                         vmid, p->pasid);
792                         break;
793                 }
794         }
795
796         if (vmid > last_vmid_to_scan) {
797                 pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
798                 return -EFAULT;
799         }
800
801         /* taking the VMID for that process on the safe way using PDD */
802         pdd = kfd_get_process_device_data(dev, p);
803         if (!pdd)
804                 return -EFAULT;
805
806         status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
807                         &reg_gfx_index);
808         if (status != 0)
809                 return -EINVAL;
810
811         /* for non DIQ we need to patch the VMID: */
812         reg_sq_cmd.bits.vm_id = vmid;
813
814         dev->kfd2kgd->wave_control_execute(dev->kgd,
815                                         reg_gfx_index.u32All,
816                                         reg_sq_cmd.u32All);
817
818         return 0;
819 }
820
821 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
822                         enum DBGDEV_TYPE type)
823 {
824         pdbgdev->dev = pdev;
825         pdbgdev->kq = NULL;
826         pdbgdev->type = type;
827         pdbgdev->pqm = NULL;
828
829         switch (type) {
830         case DBGDEV_TYPE_NODIQ:
831                 pdbgdev->dbgdev_register = dbgdev_register_nodiq;
832                 pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
833                 pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
834                 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
835                 break;
836         case DBGDEV_TYPE_DIQ:
837         default:
838                 pdbgdev->dbgdev_register = dbgdev_register_diq;
839                 pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
840                 pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
841                 pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
842                 break;
843         }
844
845 }