Merge tag 'kvm-s390-next-5.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / dma / idxd / init.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */
3 #include <linux/init.h>
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/pci.h>
8 #include <linux/interrupt.h>
9 #include <linux/delay.h>
10 #include <linux/dma-mapping.h>
11 #include <linux/workqueue.h>
12 #include <linux/aer.h>
13 #include <linux/fs.h>
14 #include <linux/io-64-nonatomic-lo-hi.h>
15 #include <linux/device.h>
16 #include <linux/idr.h>
17 #include <linux/intel-svm.h>
18 #include <linux/iommu.h>
19 #include <uapi/linux/idxd.h>
20 #include <linux/dmaengine.h>
21 #include "../dmaengine.h"
22 #include "registers.h"
23 #include "idxd.h"
24 #include "perfmon.h"
25
26 MODULE_VERSION(IDXD_DRIVER_VERSION);
27 MODULE_LICENSE("GPL v2");
28 MODULE_AUTHOR("Intel Corporation");
29
30 static bool sva = true;
31 module_param(sva, bool, 0644);
32 MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
33
34 #define DRV_NAME "idxd"
35
36 bool support_enqcmd;
37 DEFINE_IDA(idxd_ida);
38
39 static struct idxd_driver_data idxd_driver_data[] = {
40         [IDXD_TYPE_DSA] = {
41                 .name_prefix = "dsa",
42                 .type = IDXD_TYPE_DSA,
43                 .compl_size = sizeof(struct dsa_completion_record),
44                 .align = 32,
45                 .dev_type = &dsa_device_type,
46         },
47         [IDXD_TYPE_IAX] = {
48                 .name_prefix = "iax",
49                 .type = IDXD_TYPE_IAX,
50                 .compl_size = sizeof(struct iax_completion_record),
51                 .align = 64,
52                 .dev_type = &iax_device_type,
53         },
54 };
55
56 static struct pci_device_id idxd_pci_tbl[] = {
57         /* DSA ver 1.0 platforms */
58         { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) },
59
60         /* IAX ver 1.0 platforms */
61         { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) },
62         { 0, }
63 };
64 MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
65
66 static int idxd_setup_interrupts(struct idxd_device *idxd)
67 {
68         struct pci_dev *pdev = idxd->pdev;
69         struct device *dev = &pdev->dev;
70         struct idxd_irq_entry *irq_entry;
71         int i, msixcnt;
72         int rc = 0;
73
74         msixcnt = pci_msix_vec_count(pdev);
75         if (msixcnt < 0) {
76                 dev_err(dev, "Not MSI-X interrupt capable.\n");
77                 return -ENOSPC;
78         }
79
80         rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
81         if (rc != msixcnt) {
82                 dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc);
83                 return -ENOSPC;
84         }
85         dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
86
87         /*
88          * We implement 1 completion list per MSI-X entry except for
89          * entry 0, which is for errors and others.
90          */
91         idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
92                                          GFP_KERNEL, dev_to_node(dev));
93         if (!idxd->irq_entries) {
94                 rc = -ENOMEM;
95                 goto err_irq_entries;
96         }
97
98         for (i = 0; i < msixcnt; i++) {
99                 idxd->irq_entries[i].id = i;
100                 idxd->irq_entries[i].idxd = idxd;
101                 idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
102                 spin_lock_init(&idxd->irq_entries[i].list_lock);
103         }
104
105         irq_entry = &idxd->irq_entries[0];
106         rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
107                                   0, "idxd-misc", irq_entry);
108         if (rc < 0) {
109                 dev_err(dev, "Failed to allocate misc interrupt.\n");
110                 goto err_misc_irq;
111         }
112
113         dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
114
115         /* first MSI-X entry is not for wq interrupts */
116         idxd->num_wq_irqs = msixcnt - 1;
117
118         for (i = 1; i < msixcnt; i++) {
119                 irq_entry = &idxd->irq_entries[i];
120
121                 init_llist_head(&idxd->irq_entries[i].pending_llist);
122                 INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
123                 rc = request_threaded_irq(irq_entry->vector, NULL,
124                                           idxd_wq_thread, 0, "idxd-portal", irq_entry);
125                 if (rc < 0) {
126                         dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
127                         goto err_wq_irqs;
128                 }
129
130                 dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
131                 if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
132                         /*
133                          * The MSIX vector enumeration starts at 1 with vector 0 being the
134                          * misc interrupt that handles non I/O completion events. The
135                          * interrupt handles are for IMS enumeration on guest. The misc
136                          * interrupt vector does not require a handle and therefore we start
137                          * the int_handles at index 0. Since 'i' starts at 1, the first
138                          * int_handles index will be 0.
139                          */
140                         rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1],
141                                                             IDXD_IRQ_MSIX);
142                         if (rc < 0) {
143                                 free_irq(irq_entry->vector, irq_entry);
144                                 goto err_wq_irqs;
145                         }
146                         dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]);
147                 }
148         }
149
150         idxd_unmask_error_interrupts(idxd);
151         idxd_msix_perm_setup(idxd);
152         return 0;
153
154  err_wq_irqs:
155         while (--i >= 0) {
156                 irq_entry = &idxd->irq_entries[i];
157                 free_irq(irq_entry->vector, irq_entry);
158                 if (i != 0)
159                         idxd_device_release_int_handle(idxd,
160                                                        idxd->int_handles[i], IDXD_IRQ_MSIX);
161         }
162  err_misc_irq:
163         /* Disable error interrupt generation */
164         idxd_mask_error_interrupts(idxd);
165  err_irq_entries:
166         pci_free_irq_vectors(pdev);
167         dev_err(dev, "No usable interrupts\n");
168         return rc;
169 }
170
171 static void idxd_cleanup_interrupts(struct idxd_device *idxd)
172 {
173         struct pci_dev *pdev = idxd->pdev;
174         struct idxd_irq_entry *irq_entry;
175         int i, msixcnt;
176
177         msixcnt = pci_msix_vec_count(pdev);
178         if (msixcnt <= 0)
179                 return;
180
181         irq_entry = &idxd->irq_entries[0];
182         free_irq(irq_entry->vector, irq_entry);
183
184         for (i = 1; i < msixcnt; i++) {
185
186                 irq_entry = &idxd->irq_entries[i];
187                 if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
188                         idxd_device_release_int_handle(idxd, idxd->int_handles[i],
189                                                        IDXD_IRQ_MSIX);
190                 free_irq(irq_entry->vector, irq_entry);
191         }
192
193         idxd_mask_error_interrupts(idxd);
194         pci_free_irq_vectors(pdev);
195 }
196
197 static int idxd_setup_wqs(struct idxd_device *idxd)
198 {
199         struct device *dev = &idxd->pdev->dev;
200         struct idxd_wq *wq;
201         int i, rc;
202
203         idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
204                                  GFP_KERNEL, dev_to_node(dev));
205         if (!idxd->wqs)
206                 return -ENOMEM;
207
208         for (i = 0; i < idxd->max_wqs; i++) {
209                 wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev));
210                 if (!wq) {
211                         rc = -ENOMEM;
212                         goto err;
213                 }
214
215                 wq->id = i;
216                 wq->idxd = idxd;
217                 device_initialize(&wq->conf_dev);
218                 wq->conf_dev.parent = &idxd->conf_dev;
219                 wq->conf_dev.bus = &dsa_bus_type;
220                 wq->conf_dev.type = &idxd_wq_device_type;
221                 rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
222                 if (rc < 0) {
223                         put_device(&wq->conf_dev);
224                         goto err;
225                 }
226
227                 mutex_init(&wq->wq_lock);
228                 init_waitqueue_head(&wq->err_queue);
229                 init_completion(&wq->wq_dead);
230                 wq->max_xfer_bytes = idxd->max_xfer_bytes;
231                 wq->max_batch_size = idxd->max_batch_size;
232                 wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
233                 if (!wq->wqcfg) {
234                         put_device(&wq->conf_dev);
235                         rc = -ENOMEM;
236                         goto err;
237                 }
238                 idxd->wqs[i] = wq;
239         }
240
241         return 0;
242
243  err:
244         while (--i >= 0)
245                 put_device(&idxd->wqs[i]->conf_dev);
246         return rc;
247 }
248
249 static int idxd_setup_engines(struct idxd_device *idxd)
250 {
251         struct idxd_engine *engine;
252         struct device *dev = &idxd->pdev->dev;
253         int i, rc;
254
255         idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
256                                      GFP_KERNEL, dev_to_node(dev));
257         if (!idxd->engines)
258                 return -ENOMEM;
259
260         for (i = 0; i < idxd->max_engines; i++) {
261                 engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev));
262                 if (!engine) {
263                         rc = -ENOMEM;
264                         goto err;
265                 }
266
267                 engine->id = i;
268                 engine->idxd = idxd;
269                 device_initialize(&engine->conf_dev);
270                 engine->conf_dev.parent = &idxd->conf_dev;
271                 engine->conf_dev.bus = &dsa_bus_type;
272                 engine->conf_dev.type = &idxd_engine_device_type;
273                 rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
274                 if (rc < 0) {
275                         put_device(&engine->conf_dev);
276                         goto err;
277                 }
278
279                 idxd->engines[i] = engine;
280         }
281
282         return 0;
283
284  err:
285         while (--i >= 0)
286                 put_device(&idxd->engines[i]->conf_dev);
287         return rc;
288 }
289
290 static int idxd_setup_groups(struct idxd_device *idxd)
291 {
292         struct device *dev = &idxd->pdev->dev;
293         struct idxd_group *group;
294         int i, rc;
295
296         idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *),
297                                     GFP_KERNEL, dev_to_node(dev));
298         if (!idxd->groups)
299                 return -ENOMEM;
300
301         for (i = 0; i < idxd->max_groups; i++) {
302                 group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev));
303                 if (!group) {
304                         rc = -ENOMEM;
305                         goto err;
306                 }
307
308                 group->id = i;
309                 group->idxd = idxd;
310                 device_initialize(&group->conf_dev);
311                 group->conf_dev.parent = &idxd->conf_dev;
312                 group->conf_dev.bus = &dsa_bus_type;
313                 group->conf_dev.type = &idxd_group_device_type;
314                 rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id);
315                 if (rc < 0) {
316                         put_device(&group->conf_dev);
317                         goto err;
318                 }
319
320                 idxd->groups[i] = group;
321                 group->tc_a = -1;
322                 group->tc_b = -1;
323         }
324
325         return 0;
326
327  err:
328         while (--i >= 0)
329                 put_device(&idxd->groups[i]->conf_dev);
330         return rc;
331 }
332
333 static void idxd_cleanup_internals(struct idxd_device *idxd)
334 {
335         int i;
336
337         for (i = 0; i < idxd->max_groups; i++)
338                 put_device(&idxd->groups[i]->conf_dev);
339         for (i = 0; i < idxd->max_engines; i++)
340                 put_device(&idxd->engines[i]->conf_dev);
341         for (i = 0; i < idxd->max_wqs; i++)
342                 put_device(&idxd->wqs[i]->conf_dev);
343         destroy_workqueue(idxd->wq);
344 }
345
346 static int idxd_setup_internals(struct idxd_device *idxd)
347 {
348         struct device *dev = &idxd->pdev->dev;
349         int rc, i;
350
351         init_waitqueue_head(&idxd->cmd_waitq);
352
353         if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
354                 idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL,
355                                                  dev_to_node(dev));
356                 if (!idxd->int_handles)
357                         return -ENOMEM;
358         }
359
360         rc = idxd_setup_wqs(idxd);
361         if (rc < 0)
362                 goto err_wqs;
363
364         rc = idxd_setup_engines(idxd);
365         if (rc < 0)
366                 goto err_engine;
367
368         rc = idxd_setup_groups(idxd);
369         if (rc < 0)
370                 goto err_group;
371
372         idxd->wq = create_workqueue(dev_name(dev));
373         if (!idxd->wq) {
374                 rc = -ENOMEM;
375                 goto err_wkq_create;
376         }
377
378         return 0;
379
380  err_wkq_create:
381         for (i = 0; i < idxd->max_groups; i++)
382                 put_device(&idxd->groups[i]->conf_dev);
383  err_group:
384         for (i = 0; i < idxd->max_engines; i++)
385                 put_device(&idxd->engines[i]->conf_dev);
386  err_engine:
387         for (i = 0; i < idxd->max_wqs; i++)
388                 put_device(&idxd->wqs[i]->conf_dev);
389  err_wqs:
390         kfree(idxd->int_handles);
391         return rc;
392 }
393
394 static void idxd_read_table_offsets(struct idxd_device *idxd)
395 {
396         union offsets_reg offsets;
397         struct device *dev = &idxd->pdev->dev;
398
399         offsets.bits[0] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET);
400         offsets.bits[1] = ioread64(idxd->reg_base + IDXD_TABLE_OFFSET + sizeof(u64));
401         idxd->grpcfg_offset = offsets.grpcfg * IDXD_TABLE_MULT;
402         dev_dbg(dev, "IDXD Group Config Offset: %#x\n", idxd->grpcfg_offset);
403         idxd->wqcfg_offset = offsets.wqcfg * IDXD_TABLE_MULT;
404         dev_dbg(dev, "IDXD Work Queue Config Offset: %#x\n", idxd->wqcfg_offset);
405         idxd->msix_perm_offset = offsets.msix_perm * IDXD_TABLE_MULT;
406         dev_dbg(dev, "IDXD MSIX Permission Offset: %#x\n", idxd->msix_perm_offset);
407         idxd->perfmon_offset = offsets.perfmon * IDXD_TABLE_MULT;
408         dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
409 }
410
411 static void idxd_read_caps(struct idxd_device *idxd)
412 {
413         struct device *dev = &idxd->pdev->dev;
414         int i;
415
416         /* reading generic capabilities */
417         idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
418         dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
419
420         if (idxd->hw.gen_cap.cmd_cap) {
421                 idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET);
422                 dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
423         }
424
425         idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
426         dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
427         idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
428         dev_dbg(dev, "max batch size: %u\n", idxd->max_batch_size);
429         if (idxd->hw.gen_cap.config_en)
430                 set_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags);
431
432         /* reading group capabilities */
433         idxd->hw.group_cap.bits =
434                 ioread64(idxd->reg_base + IDXD_GRPCAP_OFFSET);
435         dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
436         idxd->max_groups = idxd->hw.group_cap.num_groups;
437         dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
438         idxd->max_tokens = idxd->hw.group_cap.total_tokens;
439         dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
440         idxd->nr_tokens = idxd->max_tokens;
441
442         /* read engine capabilities */
443         idxd->hw.engine_cap.bits =
444                 ioread64(idxd->reg_base + IDXD_ENGCAP_OFFSET);
445         dev_dbg(dev, "engine_cap: %#llx\n", idxd->hw.engine_cap.bits);
446         idxd->max_engines = idxd->hw.engine_cap.num_engines;
447         dev_dbg(dev, "max engines: %u\n", idxd->max_engines);
448
449         /* read workqueue capabilities */
450         idxd->hw.wq_cap.bits = ioread64(idxd->reg_base + IDXD_WQCAP_OFFSET);
451         dev_dbg(dev, "wq_cap: %#llx\n", idxd->hw.wq_cap.bits);
452         idxd->max_wq_size = idxd->hw.wq_cap.total_wq_size;
453         dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
454         idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
455         dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
456         idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
457         dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size);
458
459         /* reading operation capabilities */
460         for (i = 0; i < 4; i++) {
461                 idxd->hw.opcap.bits[i] = ioread64(idxd->reg_base +
462                                 IDXD_OPCAP_OFFSET + i * sizeof(u64));
463                 dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
464         }
465 }
466
467 static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
468 {
469         struct device *dev = &pdev->dev;
470         struct idxd_device *idxd;
471         int rc;
472
473         idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev));
474         if (!idxd)
475                 return NULL;
476
477         idxd->pdev = pdev;
478         idxd->data = data;
479         idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
480         if (idxd->id < 0)
481                 return NULL;
482
483         device_initialize(&idxd->conf_dev);
484         idxd->conf_dev.parent = dev;
485         idxd->conf_dev.bus = &dsa_bus_type;
486         idxd->conf_dev.type = idxd->data->dev_type;
487         rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
488         if (rc < 0) {
489                 put_device(&idxd->conf_dev);
490                 return NULL;
491         }
492
493         spin_lock_init(&idxd->dev_lock);
494         spin_lock_init(&idxd->cmd_lock);
495
496         return idxd;
497 }
498
499 static int idxd_enable_system_pasid(struct idxd_device *idxd)
500 {
501         int flags;
502         unsigned int pasid;
503         struct iommu_sva *sva;
504
505         flags = SVM_FLAG_SUPERVISOR_MODE;
506
507         sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
508         if (IS_ERR(sva)) {
509                 dev_warn(&idxd->pdev->dev,
510                          "iommu sva bind failed: %ld\n", PTR_ERR(sva));
511                 return PTR_ERR(sva);
512         }
513
514         pasid = iommu_sva_get_pasid(sva);
515         if (pasid == IOMMU_PASID_INVALID) {
516                 iommu_sva_unbind_device(sva);
517                 return -ENODEV;
518         }
519
520         idxd->sva = sva;
521         idxd->pasid = pasid;
522         dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
523         return 0;
524 }
525
526 static void idxd_disable_system_pasid(struct idxd_device *idxd)
527 {
528
529         iommu_sva_unbind_device(idxd->sva);
530         idxd->sva = NULL;
531 }
532
533 static int idxd_probe(struct idxd_device *idxd)
534 {
535         struct pci_dev *pdev = idxd->pdev;
536         struct device *dev = &pdev->dev;
537         int rc;
538
539         dev_dbg(dev, "%s entered and resetting device\n", __func__);
540         rc = idxd_device_init_reset(idxd);
541         if (rc < 0)
542                 return rc;
543
544         dev_dbg(dev, "IDXD reset complete\n");
545
546         if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
547                 rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA);
548                 if (rc == 0) {
549                         rc = idxd_enable_system_pasid(idxd);
550                         if (rc < 0) {
551                                 iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
552                                 dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
553                         } else {
554                                 set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
555                         }
556                 } else {
557                         dev_warn(dev, "Unable to turn on SVA feature.\n");
558                 }
559         } else if (!sva) {
560                 dev_warn(dev, "User forced SVA off via module param.\n");
561         }
562
563         idxd_read_caps(idxd);
564         idxd_read_table_offsets(idxd);
565
566         rc = idxd_setup_internals(idxd);
567         if (rc)
568                 goto err;
569
570         /* If the configs are readonly, then load them from device */
571         if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
572                 dev_dbg(dev, "Loading RO device config\n");
573                 rc = idxd_device_load_config(idxd);
574                 if (rc < 0)
575                         goto err_config;
576         }
577
578         rc = idxd_setup_interrupts(idxd);
579         if (rc)
580                 goto err_config;
581
582         dev_dbg(dev, "IDXD interrupt setup complete.\n");
583
584         idxd->major = idxd_cdev_get_major(idxd);
585
586         rc = perfmon_pmu_init(idxd);
587         if (rc < 0)
588                 dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc);
589
590         dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
591         return 0;
592
593  err_config:
594         idxd_cleanup_internals(idxd);
595  err:
596         if (device_pasid_enabled(idxd))
597                 idxd_disable_system_pasid(idxd);
598         iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
599         return rc;
600 }
601
602 static void idxd_cleanup(struct idxd_device *idxd)
603 {
604         struct device *dev = &idxd->pdev->dev;
605
606         perfmon_pmu_remove(idxd);
607         idxd_cleanup_interrupts(idxd);
608         idxd_cleanup_internals(idxd);
609         if (device_pasid_enabled(idxd))
610                 idxd_disable_system_pasid(idxd);
611         iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
612 }
613
614 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
615 {
616         struct device *dev = &pdev->dev;
617         struct idxd_device *idxd;
618         struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data;
619         int rc;
620
621         rc = pci_enable_device(pdev);
622         if (rc)
623                 return rc;
624
625         dev_dbg(dev, "Alloc IDXD context\n");
626         idxd = idxd_alloc(pdev, data);
627         if (!idxd) {
628                 rc = -ENOMEM;
629                 goto err_idxd_alloc;
630         }
631
632         dev_dbg(dev, "Mapping BARs\n");
633         idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0);
634         if (!idxd->reg_base) {
635                 rc = -ENOMEM;
636                 goto err_iomap;
637         }
638
639         dev_dbg(dev, "Set DMA masks\n");
640         rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
641         if (rc)
642                 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
643         if (rc)
644                 goto err;
645
646         rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
647         if (rc)
648                 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
649         if (rc)
650                 goto err;
651
652         dev_dbg(dev, "Set PCI master\n");
653         pci_set_master(pdev);
654         pci_set_drvdata(pdev, idxd);
655
656         idxd->hw.version = ioread32(idxd->reg_base + IDXD_VER_OFFSET);
657         rc = idxd_probe(idxd);
658         if (rc) {
659                 dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
660                 goto err;
661         }
662
663         rc = idxd_register_devices(idxd);
664         if (rc) {
665                 dev_err(dev, "IDXD sysfs setup failed\n");
666                 goto err_dev_register;
667         }
668
669         idxd->state = IDXD_DEV_CONF_READY;
670
671         dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
672                  idxd->hw.version);
673
674         return 0;
675
676  err_dev_register:
677         idxd_cleanup(idxd);
678  err:
679         pci_iounmap(pdev, idxd->reg_base);
680  err_iomap:
681         put_device(&idxd->conf_dev);
682  err_idxd_alloc:
683         pci_disable_device(pdev);
684         return rc;
685 }
686
687 static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
688 {
689         struct idxd_desc *desc, *itr;
690         struct llist_node *head;
691
692         head = llist_del_all(&ie->pending_llist);
693         if (!head)
694                 return;
695
696         llist_for_each_entry_safe(desc, itr, head, llnode) {
697                 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
698                 idxd_free_desc(desc->wq, desc);
699         }
700 }
701
702 static void idxd_flush_work_list(struct idxd_irq_entry *ie)
703 {
704         struct idxd_desc *desc, *iter;
705
706         list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
707                 list_del(&desc->list);
708                 idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
709                 idxd_free_desc(desc->wq, desc);
710         }
711 }
712
713 void idxd_wqs_quiesce(struct idxd_device *idxd)
714 {
715         struct idxd_wq *wq;
716         int i;
717
718         for (i = 0; i < idxd->max_wqs; i++) {
719                 wq = idxd->wqs[i];
720                 if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL)
721                         idxd_wq_quiesce(wq);
722         }
723 }
724
725 static void idxd_release_int_handles(struct idxd_device *idxd)
726 {
727         struct device *dev = &idxd->pdev->dev;
728         int i, rc;
729
730         for (i = 0; i < idxd->num_wq_irqs; i++) {
731                 if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) {
732                         rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i],
733                                                             IDXD_IRQ_MSIX);
734                         if (rc < 0)
735                                 dev_warn(dev, "irq handle %d release failed\n",
736                                          idxd->int_handles[i]);
737                         else
738                                 dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]);
739                 }
740         }
741 }
742
743 static void idxd_shutdown(struct pci_dev *pdev)
744 {
745         struct idxd_device *idxd = pci_get_drvdata(pdev);
746         int rc, i;
747         struct idxd_irq_entry *irq_entry;
748         int msixcnt = pci_msix_vec_count(pdev);
749
750         rc = idxd_device_disable(idxd);
751         if (rc)
752                 dev_err(&pdev->dev, "Disabling device failed\n");
753
754         dev_dbg(&pdev->dev, "%s called\n", __func__);
755         idxd_mask_msix_vectors(idxd);
756         idxd_mask_error_interrupts(idxd);
757
758         for (i = 0; i < msixcnt; i++) {
759                 irq_entry = &idxd->irq_entries[i];
760                 synchronize_irq(irq_entry->vector);
761                 free_irq(irq_entry->vector, irq_entry);
762                 if (i == 0)
763                         continue;
764                 idxd_flush_pending_llist(irq_entry);
765                 idxd_flush_work_list(irq_entry);
766         }
767
768         idxd_msix_perm_clear(idxd);
769         idxd_release_int_handles(idxd);
770         pci_free_irq_vectors(pdev);
771         pci_iounmap(pdev, idxd->reg_base);
772         pci_disable_device(pdev);
773         destroy_workqueue(idxd->wq);
774 }
775
776 static void idxd_remove(struct pci_dev *pdev)
777 {
778         struct idxd_device *idxd = pci_get_drvdata(pdev);
779
780         dev_dbg(&pdev->dev, "%s called\n", __func__);
781         idxd_shutdown(pdev);
782         if (device_pasid_enabled(idxd))
783                 idxd_disable_system_pasid(idxd);
784         idxd_unregister_devices(idxd);
785         perfmon_pmu_remove(idxd);
786         iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
787 }
788
789 static struct pci_driver idxd_pci_driver = {
790         .name           = DRV_NAME,
791         .id_table       = idxd_pci_tbl,
792         .probe          = idxd_pci_probe,
793         .remove         = idxd_remove,
794         .shutdown       = idxd_shutdown,
795 };
796
797 static int __init idxd_init_module(void)
798 {
799         int err;
800
801         /*
802          * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
803          * enumerating the device. We can not utilize it.
804          */
805         if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
806                 pr_warn("idxd driver failed to load without MOVDIR64B.\n");
807                 return -ENODEV;
808         }
809
810         if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
811                 pr_warn("Platform does not have ENQCMD(S) support.\n");
812         else
813                 support_enqcmd = true;
814
815         perfmon_init();
816
817         err = idxd_register_bus_type();
818         if (err < 0)
819                 return err;
820
821         err = idxd_register_driver();
822         if (err < 0)
823                 goto err_idxd_driver_register;
824
825         err = idxd_cdev_register();
826         if (err)
827                 goto err_cdev_register;
828
829         err = pci_register_driver(&idxd_pci_driver);
830         if (err)
831                 goto err_pci_register;
832
833         return 0;
834
835 err_pci_register:
836         idxd_cdev_remove();
837 err_cdev_register:
838         idxd_unregister_driver();
839 err_idxd_driver_register:
840         idxd_unregister_bus_type();
841         return err;
842 }
843 module_init(idxd_init_module);
844
845 static void __exit idxd_exit_module(void)
846 {
847         idxd_unregister_driver();
848         pci_unregister_driver(&idxd_pci_driver);
849         idxd_cdev_remove();
850         idxd_unregister_bus_type();
851         perfmon_exit();
852 }
853 module_exit(idxd_exit_module);