drm/amdkfd: CRIU checkpoint and restore queue control stack
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdkfd / kfd_process_queue_manager.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/slab.h>
25 #include <linux/list.h>
26 #include "kfd_device_queue_manager.h"
27 #include "kfd_priv.h"
28 #include "kfd_kernel_queue.h"
29 #include "amdgpu_amdkfd.h"
30
31 static inline struct process_queue_node *get_queue_by_qid(
32                         struct process_queue_manager *pqm, unsigned int qid)
33 {
34         struct process_queue_node *pqn;
35
36         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
37                 if ((pqn->q && pqn->q->properties.queue_id == qid) ||
38                     (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
39                         return pqn;
40         }
41
42         return NULL;
43 }
44
45 static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
46                                     unsigned int qid)
47 {
48         if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
49                 return -EINVAL;
50
51         if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
52                 pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
53                 return -ENOSPC;
54         }
55
56         return 0;
57 }
58
59 static int find_available_queue_slot(struct process_queue_manager *pqm,
60                                         unsigned int *qid)
61 {
62         unsigned long found;
63
64         found = find_first_zero_bit(pqm->queue_slot_bitmap,
65                         KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
66
67         pr_debug("The new slot id %lu\n", found);
68
69         if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
70                 pr_info("Cannot open more queues for process with pasid 0x%x\n",
71                                 pqm->process->pasid);
72                 return -ENOMEM;
73         }
74
75         set_bit(found, pqm->queue_slot_bitmap);
76         *qid = found;
77
78         return 0;
79 }
80
81 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
82 {
83         struct kfd_dev *dev = pdd->dev;
84
85         if (pdd->already_dequeued)
86                 return;
87
88         dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
89         pdd->already_dequeued = true;
90 }
91
92 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
93                         void *gws)
94 {
95         struct kfd_dev *dev = NULL;
96         struct process_queue_node *pqn;
97         struct kfd_process_device *pdd;
98         struct kgd_mem *mem = NULL;
99         int ret;
100
101         pqn = get_queue_by_qid(pqm, qid);
102         if (!pqn) {
103                 pr_err("Queue id does not match any known queue\n");
104                 return -EINVAL;
105         }
106
107         if (pqn->q)
108                 dev = pqn->q->device;
109         if (WARN_ON(!dev))
110                 return -ENODEV;
111
112         pdd = kfd_get_process_device_data(dev, pqm->process);
113         if (!pdd) {
114                 pr_err("Process device data doesn't exist\n");
115                 return -EINVAL;
116         }
117
118         /* Only allow one queue per process can have GWS assigned */
119         if (gws && pdd->qpd.num_gws)
120                 return -EBUSY;
121
122         if (!gws && pdd->qpd.num_gws == 0)
123                 return -EINVAL;
124
125         if (gws)
126                 ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info,
127                         gws, &mem);
128         else
129                 ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info,
130                         pqn->q->gws);
131         if (unlikely(ret))
132                 return ret;
133
134         pqn->q->gws = mem;
135         pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;
136
137         return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
138                                                         pqn->q, NULL);
139 }
140
141 void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
142 {
143         int i;
144
145         for (i = 0; i < p->n_pdds; i++)
146                 kfd_process_dequeue_from_device(p->pdds[i]);
147 }
148
149 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
150 {
151         INIT_LIST_HEAD(&pqm->queues);
152         pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
153                                                GFP_KERNEL);
154         if (!pqm->queue_slot_bitmap)
155                 return -ENOMEM;
156         pqm->process = p;
157
158         return 0;
159 }
160
161 void pqm_uninit(struct process_queue_manager *pqm)
162 {
163         struct process_queue_node *pqn, *next;
164
165         list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
166                 if (pqn->q && pqn->q->gws)
167                         amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
168                                 pqn->q->gws);
169                 kfd_procfs_del_queue(pqn->q);
170                 uninit_queue(pqn->q);
171                 list_del(&pqn->process_queue_list);
172                 kfree(pqn);
173         }
174
175         bitmap_free(pqm->queue_slot_bitmap);
176         pqm->queue_slot_bitmap = NULL;
177 }
178
179 static int init_user_queue(struct process_queue_manager *pqm,
180                                 struct kfd_dev *dev, struct queue **q,
181                                 struct queue_properties *q_properties,
182                                 struct file *f, unsigned int qid)
183 {
184         int retval;
185
186         /* Doorbell initialized in user space*/
187         q_properties->doorbell_ptr = NULL;
188
189         /* let DQM handle it*/
190         q_properties->vmid = 0;
191         q_properties->queue_id = qid;
192
193         retval = init_queue(q, q_properties);
194         if (retval != 0)
195                 return retval;
196
197         (*q)->device = dev;
198         (*q)->process = pqm->process;
199
200         pr_debug("PQM After init queue");
201
202         return retval;
203 }
204
205 int pqm_create_queue(struct process_queue_manager *pqm,
206                             struct kfd_dev *dev,
207                             struct file *f,
208                             struct queue_properties *properties,
209                             unsigned int *qid,
210                             const struct kfd_criu_queue_priv_data *q_data,
211                             const void *restore_mqd,
212                             const void *restore_ctl_stack,
213                             uint32_t *p_doorbell_offset_in_process)
214 {
215         int retval;
216         struct kfd_process_device *pdd;
217         struct queue *q;
218         struct process_queue_node *pqn;
219         struct kernel_queue *kq;
220         enum kfd_queue_type type = properties->type;
221         unsigned int max_queues = 127; /* HWS limit */
222
223         q = NULL;
224         kq = NULL;
225
226         pdd = kfd_get_process_device_data(dev, pqm->process);
227         if (!pdd) {
228                 pr_err("Process device data doesn't exist\n");
229                 return -1;
230         }
231
232         /*
233          * for debug process, verify that it is within the static queues limit
234          * currently limit is set to half of the total avail HQD slots
235          * If we are just about to create DIQ, the is_debug flag is not set yet
236          * Hence we also check the type as well
237          */
238         if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
239                 max_queues = dev->device_info.max_no_of_hqd/2;
240
241         if (pdd->qpd.queue_count >= max_queues)
242                 return -ENOSPC;
243
244         if (q_data) {
245                 retval = assign_queue_slot_by_qid(pqm, q_data->q_id);
246                 *qid = q_data->q_id;
247         } else
248                 retval = find_available_queue_slot(pqm, qid);
249
250         if (retval != 0)
251                 return retval;
252
253         if (list_empty(&pdd->qpd.queues_list) &&
254             list_empty(&pdd->qpd.priv_queue_list))
255                 dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
256
257         pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
258         if (!pqn) {
259                 retval = -ENOMEM;
260                 goto err_allocate_pqn;
261         }
262
263         switch (type) {
264         case KFD_QUEUE_TYPE_SDMA:
265         case KFD_QUEUE_TYPE_SDMA_XGMI:
266                 /* SDMA queues are always allocated statically no matter
267                  * which scheduler mode is used. We also do not need to
268                  * check whether a SDMA queue can be allocated here, because
269                  * allocate_sdma_queue() in create_queue() has the
270                  * corresponding check logic.
271                  */
272                 retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
273                 if (retval != 0)
274                         goto err_create_queue;
275                 pqn->q = q;
276                 pqn->kq = NULL;
277                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
278                                                     restore_mqd, restore_ctl_stack);
279                 print_queue(q);
280                 break;
281
282         case KFD_QUEUE_TYPE_COMPUTE:
283                 /* check if there is over subscription */
284                 if ((dev->dqm->sched_policy ==
285                      KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
286                 ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
287                 (dev->dqm->active_queue_count >= get_cp_queues_num(dev->dqm)))) {
288                         pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
289                         retval = -EPERM;
290                         goto err_create_queue;
291                 }
292
293                 retval = init_user_queue(pqm, dev, &q, properties, f, *qid);
294                 if (retval != 0)
295                         goto err_create_queue;
296                 pqn->q = q;
297                 pqn->kq = NULL;
298                 retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
299                                                     restore_mqd, restore_ctl_stack);
300                 print_queue(q);
301                 break;
302         case KFD_QUEUE_TYPE_DIQ:
303                 kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
304                 if (!kq) {
305                         retval = -ENOMEM;
306                         goto err_create_queue;
307                 }
308                 kq->queue->properties.queue_id = *qid;
309                 pqn->kq = kq;
310                 pqn->q = NULL;
311                 retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
312                                                         kq, &pdd->qpd);
313                 break;
314         default:
315                 WARN(1, "Invalid queue type %d", type);
316                 retval = -EINVAL;
317         }
318
319         if (retval != 0) {
320                 pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
321                         pqm->process->pasid, type, retval);
322                 goto err_create_queue;
323         }
324
325         if (q && p_doorbell_offset_in_process)
326                 /* Return the doorbell offset within the doorbell page
327                  * to the caller so it can be passed up to user mode
328                  * (in bytes).
329                  * There are always 1024 doorbells per process, so in case
330                  * of 8-byte doorbells, there are two doorbell pages per
331                  * process.
332                  */
333                 *p_doorbell_offset_in_process =
334                         (q->properties.doorbell_off * sizeof(uint32_t)) &
335                         (kfd_doorbell_process_slice(dev) - 1);
336
337         pr_debug("PQM After DQM create queue\n");
338
339         list_add(&pqn->process_queue_list, &pqm->queues);
340
341         if (q) {
342                 pr_debug("PQM done creating queue\n");
343                 kfd_procfs_add_queue(q);
344                 print_queue_properties(&q->properties);
345         }
346
347         return retval;
348
349 err_create_queue:
350         uninit_queue(q);
351         if (kq)
352                 kernel_queue_uninit(kq, false);
353         kfree(pqn);
354 err_allocate_pqn:
355         /* check if queues list is empty unregister process from device */
356         clear_bit(*qid, pqm->queue_slot_bitmap);
357         if (list_empty(&pdd->qpd.queues_list) &&
358             list_empty(&pdd->qpd.priv_queue_list))
359                 dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
360         return retval;
361 }
362
363 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
364 {
365         struct process_queue_node *pqn;
366         struct kfd_process_device *pdd;
367         struct device_queue_manager *dqm;
368         struct kfd_dev *dev;
369         int retval;
370
371         dqm = NULL;
372
373         retval = 0;
374
375         pqn = get_queue_by_qid(pqm, qid);
376         if (!pqn) {
377                 pr_err("Queue id does not match any known queue\n");
378                 return -EINVAL;
379         }
380
381         dev = NULL;
382         if (pqn->kq)
383                 dev = pqn->kq->dev;
384         if (pqn->q)
385                 dev = pqn->q->device;
386         if (WARN_ON(!dev))
387                 return -ENODEV;
388
389         pdd = kfd_get_process_device_data(dev, pqm->process);
390         if (!pdd) {
391                 pr_err("Process device data doesn't exist\n");
392                 return -1;
393         }
394
395         if (pqn->kq) {
396                 /* destroy kernel queue (DIQ) */
397                 dqm = pqn->kq->dev->dqm;
398                 dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
399                 kernel_queue_uninit(pqn->kq, false);
400         }
401
402         if (pqn->q) {
403                 kfd_procfs_del_queue(pqn->q);
404                 dqm = pqn->q->device->dqm;
405                 retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
406                 if (retval) {
407                         pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
408                                 pqm->process->pasid,
409                                 pqn->q->properties.queue_id, retval);
410                         if (retval != -ETIME)
411                                 goto err_destroy_queue;
412                 }
413
414                 if (pqn->q->gws) {
415                         amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info,
416                                 pqn->q->gws);
417                         pdd->qpd.num_gws = 0;
418                 }
419
420                 uninit_queue(pqn->q);
421         }
422
423         list_del(&pqn->process_queue_list);
424         kfree(pqn);
425         clear_bit(qid, pqm->queue_slot_bitmap);
426
427         if (list_empty(&pdd->qpd.queues_list) &&
428             list_empty(&pdd->qpd.priv_queue_list))
429                 dqm->ops.unregister_process(dqm, &pdd->qpd);
430
431 err_destroy_queue:
432         return retval;
433 }
434
435 int pqm_update_queue_properties(struct process_queue_manager *pqm,
436                                 unsigned int qid, struct queue_properties *p)
437 {
438         int retval;
439         struct process_queue_node *pqn;
440
441         pqn = get_queue_by_qid(pqm, qid);
442         if (!pqn) {
443                 pr_debug("No queue %d exists for update operation\n", qid);
444                 return -EFAULT;
445         }
446
447         pqn->q->properties.queue_address = p->queue_address;
448         pqn->q->properties.queue_size = p->queue_size;
449         pqn->q->properties.queue_percent = p->queue_percent;
450         pqn->q->properties.priority = p->priority;
451
452         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
453                                                         pqn->q, NULL);
454         if (retval != 0)
455                 return retval;
456
457         return 0;
458 }
459
460 int pqm_update_mqd(struct process_queue_manager *pqm,
461                                 unsigned int qid, struct mqd_update_info *minfo)
462 {
463         int retval;
464         struct process_queue_node *pqn;
465
466         pqn = get_queue_by_qid(pqm, qid);
467         if (!pqn) {
468                 pr_debug("No queue %d exists for update operation\n", qid);
469                 return -EFAULT;
470         }
471
472         retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
473                                                         pqn->q, minfo);
474         if (retval != 0)
475                 return retval;
476
477         return 0;
478 }
479
480 struct kernel_queue *pqm_get_kernel_queue(
481                                         struct process_queue_manager *pqm,
482                                         unsigned int qid)
483 {
484         struct process_queue_node *pqn;
485
486         pqn = get_queue_by_qid(pqm, qid);
487         if (pqn && pqn->kq)
488                 return pqn->kq;
489
490         return NULL;
491 }
492
493 struct queue *pqm_get_user_queue(struct process_queue_manager *pqm,
494                                         unsigned int qid)
495 {
496         struct process_queue_node *pqn;
497
498         pqn = get_queue_by_qid(pqm, qid);
499         return pqn ? pqn->q : NULL;
500 }
501
502 int pqm_get_wave_state(struct process_queue_manager *pqm,
503                        unsigned int qid,
504                        void __user *ctl_stack,
505                        u32 *ctl_stack_used_size,
506                        u32 *save_area_used_size)
507 {
508         struct process_queue_node *pqn;
509
510         pqn = get_queue_by_qid(pqm, qid);
511         if (!pqn) {
512                 pr_debug("amdkfd: No queue %d exists for operation\n",
513                          qid);
514                 return -EFAULT;
515         }
516
517         return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
518                                                        pqn->q,
519                                                        ctl_stack,
520                                                        ctl_stack_used_size,
521                                                        save_area_used_size);
522 }
523
524 static int get_queue_data_sizes(struct kfd_process_device *pdd,
525                                 struct queue *q,
526                                 uint32_t *mqd_size,
527                                 uint32_t *ctl_stack_size)
528 {
529         int ret;
530
531         ret = pqm_get_queue_checkpoint_info(&pdd->process->pqm,
532                                             q->properties.queue_id,
533                                             mqd_size,
534                                             ctl_stack_size);
535         if (ret)
536                 pr_err("Failed to get queue dump info (%d)\n", ret);
537
538         return ret;
539 }
540
541 int kfd_process_get_queue_info(struct kfd_process *p,
542                                uint32_t *num_queues,
543                                uint64_t *priv_data_sizes)
544 {
545         uint32_t extra_data_sizes = 0;
546         struct queue *q;
547         int i;
548         int ret;
549
550         *num_queues = 0;
551
552         /* Run over all PDDs of the process */
553         for (i = 0; i < p->n_pdds; i++) {
554                 struct kfd_process_device *pdd = p->pdds[i];
555
556                 list_for_each_entry(q, &pdd->qpd.queues_list, list) {
557                         if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
558                                 q->properties.type == KFD_QUEUE_TYPE_SDMA ||
559                                 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
560                                 uint32_t mqd_size, ctl_stack_size;
561
562                                 *num_queues = *num_queues + 1;
563
564                                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
565                                 if (ret)
566                                         return ret;
567
568                                 extra_data_sizes += mqd_size + ctl_stack_size;
569                         } else {
570                                 pr_err("Unsupported queue type (%d)\n", q->properties.type);
571                                 return -EOPNOTSUPP;
572                         }
573                 }
574         }
575         *priv_data_sizes = extra_data_sizes +
576                                 (*num_queues * sizeof(struct kfd_criu_queue_priv_data));
577
578         return 0;
579 }
580
581 static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
582                               unsigned int qid,
583                               void *mqd,
584                               void *ctl_stack)
585 {
586         struct process_queue_node *pqn;
587
588         pqn = get_queue_by_qid(pqm, qid);
589         if (!pqn) {
590                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
591                 return -EFAULT;
592         }
593
594         if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
595                 pr_err("amdkfd: queue dumping not supported on this device\n");
596                 return -EOPNOTSUPP;
597         }
598
599         return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
600                                                        pqn->q, mqd, ctl_stack);
601 }
602
603 static int criu_checkpoint_queue(struct kfd_process_device *pdd,
604                            struct queue *q,
605                            struct kfd_criu_queue_priv_data *q_data)
606 {
607         uint8_t *mqd, *ctl_stack;
608         int ret;
609
610         mqd = (void *)(q_data + 1);
611         ctl_stack = mqd + q_data->mqd_size;
612
613         q_data->gpu_id = pdd->dev->id;
614         q_data->type = q->properties.type;
615         q_data->format = q->properties.format;
616         q_data->q_id =  q->properties.queue_id;
617         q_data->q_address = q->properties.queue_address;
618         q_data->q_size = q->properties.queue_size;
619         q_data->priority = q->properties.priority;
620         q_data->q_percent = q->properties.queue_percent;
621         q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
622         q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
623         q_data->doorbell_id = q->doorbell_id;
624
625         q_data->sdma_id = q->sdma_id;
626
627         q_data->eop_ring_buffer_address =
628                 q->properties.eop_ring_buffer_address;
629
630         q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
631
632         q_data->ctx_save_restore_area_address =
633                 q->properties.ctx_save_restore_area_address;
634
635         q_data->ctx_save_restore_area_size =
636                 q->properties.ctx_save_restore_area_size;
637
638         ret = pqm_checkpoint_mqd(&pdd->process->pqm, q->properties.queue_id, mqd, ctl_stack);
639         if (ret) {
640                 pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
641                 return ret;
642         }
643
644         pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
645         return ret;
646 }
647
648 static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
649                                    uint8_t __user *user_priv,
650                                    unsigned int *q_index,
651                                    uint64_t *queues_priv_data_offset)
652 {
653         unsigned int q_private_data_size = 0;
654         uint8_t *q_private_data = NULL; /* Local buffer to store individual queue private data */
655         struct queue *q;
656         int ret = 0;
657
658         list_for_each_entry(q, &pdd->qpd.queues_list, list) {
659                 struct kfd_criu_queue_priv_data *q_data;
660                 uint64_t q_data_size;
661                 uint32_t mqd_size;
662                 uint32_t ctl_stack_size;
663
664                 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
665                         q->properties.type != KFD_QUEUE_TYPE_SDMA &&
666                         q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
667
668                         pr_err("Unsupported queue type (%d)\n", q->properties.type);
669                         ret = -EOPNOTSUPP;
670                         break;
671                 }
672
673                 ret = get_queue_data_sizes(pdd, q, &mqd_size, &ctl_stack_size);
674                 if (ret)
675                         break;
676
677                 q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
678
679                 /* Increase local buffer space if needed */
680                 if (q_private_data_size < q_data_size) {
681                         kfree(q_private_data);
682
683                         q_private_data = kzalloc(q_data_size, GFP_KERNEL);
684                         if (!q_private_data) {
685                                 ret = -ENOMEM;
686                                 break;
687                         }
688                         q_private_data_size = q_data_size;
689                 }
690
691                 q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
692
693                 /* data stored in this order: priv_data, mqd, ctl_stack */
694                 q_data->mqd_size = mqd_size;
695                 q_data->ctl_stack_size = ctl_stack_size;
696
697                 ret = criu_checkpoint_queue(pdd, q, q_data);
698                 if (ret)
699                         break;
700
701                 q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
702
703                 ret = copy_to_user(user_priv + *queues_priv_data_offset,
704                                 q_data, q_data_size);
705                 if (ret) {
706                         ret = -EFAULT;
707                         break;
708                 }
709                 *queues_priv_data_offset += q_data_size;
710                 *q_index = *q_index + 1;
711         }
712
713         kfree(q_private_data);
714
715         return ret;
716 }
717
718 int kfd_criu_checkpoint_queues(struct kfd_process *p,
719                          uint8_t __user *user_priv_data,
720                          uint64_t *priv_data_offset)
721 {
722         int ret = 0, pdd_index, q_index = 0;
723
724         for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
725                 struct kfd_process_device *pdd = p->pdds[pdd_index];
726
727                 /*
728                  * criu_checkpoint_queues_device will copy data to user and update q_index and
729                  * queues_priv_data_offset
730                  */
731                 ret = criu_checkpoint_queues_device(pdd, user_priv_data, &q_index,
732                                               priv_data_offset);
733
734                 if (ret)
735                         break;
736         }
737
738         return ret;
739 }
740
741 static void set_queue_properties_from_criu(struct queue_properties *qp,
742                                           struct kfd_criu_queue_priv_data *q_data)
743 {
744         qp->is_interop = false;
745         qp->is_gws = q_data->is_gws;
746         qp->queue_percent = q_data->q_percent;
747         qp->priority = q_data->priority;
748         qp->queue_address = q_data->q_address;
749         qp->queue_size = q_data->q_size;
750         qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
751         qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
752         qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
753         qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
754         qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
755         qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
756         qp->ctl_stack_size = q_data->ctl_stack_size;
757         qp->type = q_data->type;
758         qp->format = q_data->format;
759 }
760
761 int kfd_criu_restore_queue(struct kfd_process *p,
762                            uint8_t __user *user_priv_ptr,
763                            uint64_t *priv_data_offset,
764                            uint64_t max_priv_data_size)
765 {
766         uint8_t *mqd, *ctl_stack, *q_extra_data = NULL;
767         struct kfd_criu_queue_priv_data *q_data;
768         struct kfd_process_device *pdd;
769         uint64_t q_extra_data_size;
770         struct queue_properties qp;
771         unsigned int queue_id;
772         struct kfd_dev *dev;
773         int ret = 0;
774
775         if (*priv_data_offset + sizeof(*q_data) > max_priv_data_size)
776                 return -EINVAL;
777
778         q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
779         if (!q_data)
780                 return -ENOMEM;
781
782         ret = copy_from_user(q_data, user_priv_ptr + *priv_data_offset, sizeof(*q_data));
783         if (ret) {
784                 ret = -EFAULT;
785                 goto exit;
786         }
787
788         *priv_data_offset += sizeof(*q_data);
789         q_extra_data_size = q_data->ctl_stack_size + q_data->mqd_size;
790
791         if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
792                 ret = -EINVAL;
793                 goto exit;
794         }
795
796         q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
797         if (!q_extra_data) {
798                 ret = -ENOMEM;
799                 goto exit;
800         }
801
802         ret = copy_from_user(q_extra_data, user_priv_ptr + *priv_data_offset, q_extra_data_size);
803         if (ret) {
804                 ret = -EFAULT;
805                 goto exit;
806         }
807
808         *priv_data_offset += q_extra_data_size;
809
810         dev = kfd_device_by_id(q_data->gpu_id);
811         if (!dev) {
812                 pr_err("Could not get kfd_dev from gpu_id = 0x%x\n",
813                 q_data->gpu_id);
814
815                 ret = -EINVAL;
816                 goto exit;
817         }
818
819         pdd = kfd_get_process_device_data(dev, p);
820         if (!pdd) {
821                 pr_err("Failed to get pdd\n");
822                 ret = -EFAULT;
823                 return ret;
824         }
825         /* data stored in this order: mqd, ctl_stack */
826         mqd = q_extra_data;
827         ctl_stack = mqd + q_data->mqd_size;
828
829         memset(&qp, 0, sizeof(qp));
830         set_queue_properties_from_criu(&qp, q_data);
831
832         print_queue_properties(&qp);
833
834         ret = pqm_create_queue(&p->pqm, pdd->dev, NULL, &qp, &queue_id, q_data, mqd, ctl_stack,
835                                 NULL);
836         if (ret) {
837                 pr_err("Failed to create new queue err:%d\n", ret);
838                 ret = -EINVAL;
839         }
840
841 exit:
842         if (ret)
843                 pr_err("Failed to create queue (%d)\n", ret);
844         else
845                 pr_debug("Queue id %d was restored successfully\n", queue_id);
846
847         kfree(q_data);
848
849         return ret;
850 }
851
852 int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
853                                   unsigned int qid,
854                                   uint32_t *mqd_size,
855                                   uint32_t *ctl_stack_size)
856 {
857         struct process_queue_node *pqn;
858
859         pqn = get_queue_by_qid(pqm, qid);
860         if (!pqn) {
861                 pr_debug("amdkfd: No queue %d exists for operation\n", qid);
862                 return -EFAULT;
863         }
864
865         if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
866                 pr_err("amdkfd: queue dumping not supported on this device\n");
867                 return -EOPNOTSUPP;
868         }
869
870         pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
871                                                        pqn->q, mqd_size,
872                                                        ctl_stack_size);
873         return 0;
874 }
875
876 #if defined(CONFIG_DEBUG_FS)
877
878 int pqm_debugfs_mqds(struct seq_file *m, void *data)
879 {
880         struct process_queue_manager *pqm = data;
881         struct process_queue_node *pqn;
882         struct queue *q;
883         enum KFD_MQD_TYPE mqd_type;
884         struct mqd_manager *mqd_mgr;
885         int r = 0;
886
887         list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
888                 if (pqn->q) {
889                         q = pqn->q;
890                         switch (q->properties.type) {
891                         case KFD_QUEUE_TYPE_SDMA:
892                         case KFD_QUEUE_TYPE_SDMA_XGMI:
893                                 seq_printf(m, "  SDMA queue on device %x\n",
894                                            q->device->id);
895                                 mqd_type = KFD_MQD_TYPE_SDMA;
896                                 break;
897                         case KFD_QUEUE_TYPE_COMPUTE:
898                                 seq_printf(m, "  Compute queue on device %x\n",
899                                            q->device->id);
900                                 mqd_type = KFD_MQD_TYPE_CP;
901                                 break;
902                         default:
903                                 seq_printf(m,
904                                 "  Bad user queue type %d on device %x\n",
905                                            q->properties.type, q->device->id);
906                                 continue;
907                         }
908                         mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
909                 } else if (pqn->kq) {
910                         q = pqn->kq->queue;
911                         mqd_mgr = pqn->kq->mqd_mgr;
912                         switch (q->properties.type) {
913                         case KFD_QUEUE_TYPE_DIQ:
914                                 seq_printf(m, "  DIQ on device %x\n",
915                                            pqn->kq->dev->id);
916                                 break;
917                         default:
918                                 seq_printf(m,
919                                 "  Bad kernel queue type %d on device %x\n",
920                                            q->properties.type,
921                                            pqn->kq->dev->id);
922                                 continue;
923                         }
924                 } else {
925                         seq_printf(m,
926                 "  Weird: Queue node with neither kernel nor user queue\n");
927                         continue;
928                 }
929
930                 r = mqd_mgr->debugfs_show_mqd(m, q->mqd);
931                 if (r != 0)
932                         break;
933         }
934
935         return r;
936 }
937
938 #endif