iommu: Separate SVA and IOPF
[linux-2.6-microblaze.git] / drivers / iommu / intel / svm.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright © 2015 Intel Corporation.
4  *
5  * Authors: David Woodhouse <dwmw2@infradead.org>
6  */
7
8 #include <linux/mmu_notifier.h>
9 #include <linux/sched.h>
10 #include <linux/sched/mm.h>
11 #include <linux/slab.h>
12 #include <linux/rculist.h>
13 #include <linux/pci.h>
14 #include <linux/pci-ats.h>
15 #include <linux/dmar.h>
16 #include <linux/interrupt.h>
17 #include <linux/mm_types.h>
18 #include <linux/xarray.h>
19 #include <asm/page.h>
20 #include <asm/fpu/api.h>
21
22 #include "iommu.h"
23 #include "pasid.h"
24 #include "perf.h"
25 #include "trace.h"
26
27 static irqreturn_t prq_event_thread(int irq, void *d);
28
29 static DEFINE_XARRAY_ALLOC(pasid_private_array);
30 static int pasid_private_add(ioasid_t pasid, void *priv)
31 {
32         return xa_alloc(&pasid_private_array, &pasid, priv,
33                         XA_LIMIT(pasid, pasid), GFP_ATOMIC);
34 }
35
36 static void pasid_private_remove(ioasid_t pasid)
37 {
38         xa_erase(&pasid_private_array, pasid);
39 }
40
41 static void *pasid_private_find(ioasid_t pasid)
42 {
43         return xa_load(&pasid_private_array, pasid);
44 }
45
46 static struct intel_svm_dev *
47 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
48 {
49         struct intel_svm_dev *sdev = NULL, *t;
50
51         rcu_read_lock();
52         list_for_each_entry_rcu(t, &svm->devs, list) {
53                 if (t->dev == dev) {
54                         sdev = t;
55                         break;
56                 }
57         }
58         rcu_read_unlock();
59
60         return sdev;
61 }
62
63 int intel_svm_enable_prq(struct intel_iommu *iommu)
64 {
65         struct iopf_queue *iopfq;
66         struct page *pages;
67         int irq, ret;
68
69         pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
70         if (!pages) {
71                 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
72                         iommu->name);
73                 return -ENOMEM;
74         }
75         iommu->prq = page_address(pages);
76
77         irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu);
78         if (irq <= 0) {
79                 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
80                        iommu->name);
81                 ret = -EINVAL;
82                 goto free_prq;
83         }
84         iommu->pr_irq = irq;
85
86         snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
87                  "dmar%d-iopfq", iommu->seq_id);
88         iopfq = iopf_queue_alloc(iommu->iopfq_name);
89         if (!iopfq) {
90                 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
91                 ret = -ENOMEM;
92                 goto free_hwirq;
93         }
94         iommu->iopf_queue = iopfq;
95
96         snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
97
98         ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
99                                    iommu->prq_name, iommu);
100         if (ret) {
101                 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
102                        iommu->name);
103                 goto free_iopfq;
104         }
105         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
106         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
107         dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
108
109         init_completion(&iommu->prq_complete);
110
111         return 0;
112
113 free_iopfq:
114         iopf_queue_free(iommu->iopf_queue);
115         iommu->iopf_queue = NULL;
116 free_hwirq:
117         dmar_free_hwirq(irq);
118         iommu->pr_irq = 0;
119 free_prq:
120         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
121         iommu->prq = NULL;
122
123         return ret;
124 }
125
126 int intel_svm_finish_prq(struct intel_iommu *iommu)
127 {
128         dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
129         dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
130         dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
131
132         if (iommu->pr_irq) {
133                 free_irq(iommu->pr_irq, iommu);
134                 dmar_free_hwirq(iommu->pr_irq);
135                 iommu->pr_irq = 0;
136         }
137
138         if (iommu->iopf_queue) {
139                 iopf_queue_free(iommu->iopf_queue);
140                 iommu->iopf_queue = NULL;
141         }
142
143         free_pages((unsigned long)iommu->prq, PRQ_ORDER);
144         iommu->prq = NULL;
145
146         return 0;
147 }
148
149 void intel_svm_check(struct intel_iommu *iommu)
150 {
151         if (!pasid_supported(iommu))
152                 return;
153
154         if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
155             !cap_fl1gp_support(iommu->cap)) {
156                 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
157                        iommu->name);
158                 return;
159         }
160
161         if (cpu_feature_enabled(X86_FEATURE_LA57) &&
162             !cap_fl5lp_support(iommu->cap)) {
163                 pr_err("%s SVM disabled, incompatible paging mode\n",
164                        iommu->name);
165                 return;
166         }
167
168         iommu->flags |= VTD_FLAG_SVM_CAPABLE;
169 }
170
171 static void __flush_svm_range_dev(struct intel_svm *svm,
172                                   struct intel_svm_dev *sdev,
173                                   unsigned long address,
174                                   unsigned long pages, int ih)
175 {
176         struct device_domain_info *info = dev_iommu_priv_get(sdev->dev);
177
178         if (WARN_ON(!pages))
179                 return;
180
181         qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih);
182         if (info->ats_enabled) {
183                 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
184                                          svm->pasid, sdev->qdep, address,
185                                          order_base_2(pages));
186                 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages),
187                                           svm->pasid, sdev->qdep);
188         }
189 }
190
191 static void intel_flush_svm_range_dev(struct intel_svm *svm,
192                                       struct intel_svm_dev *sdev,
193                                       unsigned long address,
194                                       unsigned long pages, int ih)
195 {
196         unsigned long shift = ilog2(__roundup_pow_of_two(pages));
197         unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
198         unsigned long start = ALIGN_DOWN(address, align);
199         unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
200
201         while (start < end) {
202                 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
203                 start += align;
204         }
205 }
206
207 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
208                                 unsigned long pages, int ih)
209 {
210         struct intel_svm_dev *sdev;
211
212         rcu_read_lock();
213         list_for_each_entry_rcu(sdev, &svm->devs, list)
214                 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
215         rcu_read_unlock();
216 }
217
218 static void intel_flush_svm_all(struct intel_svm *svm)
219 {
220         struct device_domain_info *info;
221         struct intel_svm_dev *sdev;
222
223         rcu_read_lock();
224         list_for_each_entry_rcu(sdev, &svm->devs, list) {
225                 info = dev_iommu_priv_get(sdev->dev);
226
227                 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, 0, -1UL, 0);
228                 if (info->ats_enabled) {
229                         qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid,
230                                                  svm->pasid, sdev->qdep,
231                                                  0, 64 - VTD_PAGE_SHIFT);
232                         quirk_extra_dev_tlb_flush(info, 0, 64 - VTD_PAGE_SHIFT,
233                                                   svm->pasid, sdev->qdep);
234                 }
235         }
236         rcu_read_unlock();
237 }
238
239 /* Pages have been freed at this point */
240 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
241                                         struct mm_struct *mm,
242                                         unsigned long start, unsigned long end)
243 {
244         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
245
246         if (start == 0 && end == -1UL) {
247                 intel_flush_svm_all(svm);
248                 return;
249         }
250
251         intel_flush_svm_range(svm, start,
252                               (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
253 }
254
255 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
256 {
257         struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
258         struct intel_svm_dev *sdev;
259
260         /* This might end up being called from exit_mmap(), *before* the page
261          * tables are cleared. And __mmu_notifier_release() will delete us from
262          * the list of notifiers so that our invalidate_range() callback doesn't
263          * get called when the page tables are cleared. So we need to protect
264          * against hardware accessing those page tables.
265          *
266          * We do it by clearing the entry in the PASID table and then flushing
267          * the IOTLB and the PASID table caches. This might upset hardware;
268          * perhaps we'll want to point the PASID to a dummy PGD (like the zero
269          * page) so that we end up taking a fault that the hardware really
270          * *has* to handle gracefully without affecting other processes.
271          */
272         rcu_read_lock();
273         list_for_each_entry_rcu(sdev, &svm->devs, list)
274                 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
275                                             svm->pasid, true);
276         rcu_read_unlock();
277
278 }
279
280 static const struct mmu_notifier_ops intel_mmuops = {
281         .release = intel_mm_release,
282         .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs,
283 };
284
285 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
286                              struct intel_svm **rsvm,
287                              struct intel_svm_dev **rsdev)
288 {
289         struct intel_svm_dev *sdev = NULL;
290         struct intel_svm *svm;
291
292         if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX)
293                 return -EINVAL;
294
295         svm = pasid_private_find(pasid);
296         if (IS_ERR(svm))
297                 return PTR_ERR(svm);
298
299         if (!svm)
300                 goto out;
301
302         /*
303          * If we found svm for the PASID, there must be at least one device
304          * bond.
305          */
306         if (WARN_ON(list_empty(&svm->devs)))
307                 return -EINVAL;
308         sdev = svm_lookup_device_by_dev(svm, dev);
309
310 out:
311         *rsvm = svm;
312         *rsdev = sdev;
313
314         return 0;
315 }
316
317 static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev,
318                              struct iommu_domain *domain, ioasid_t pasid)
319 {
320         struct device_domain_info *info = dev_iommu_priv_get(dev);
321         struct mm_struct *mm = domain->mm;
322         struct intel_svm_dev *sdev;
323         struct intel_svm *svm;
324         unsigned long sflags;
325         int ret = 0;
326
327         svm = pasid_private_find(pasid);
328         if (!svm) {
329                 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
330                 if (!svm)
331                         return -ENOMEM;
332
333                 svm->pasid = pasid;
334                 svm->mm = mm;
335                 INIT_LIST_HEAD_RCU(&svm->devs);
336
337                 svm->notifier.ops = &intel_mmuops;
338                 ret = mmu_notifier_register(&svm->notifier, mm);
339                 if (ret) {
340                         kfree(svm);
341                         return ret;
342                 }
343
344                 ret = pasid_private_add(svm->pasid, svm);
345                 if (ret) {
346                         mmu_notifier_unregister(&svm->notifier, mm);
347                         kfree(svm);
348                         return ret;
349                 }
350         }
351
352         sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
353         if (!sdev) {
354                 ret = -ENOMEM;
355                 goto free_svm;
356         }
357
358         sdev->dev = dev;
359         sdev->iommu = iommu;
360         sdev->did = FLPT_DEFAULT_DID;
361         sdev->sid = PCI_DEVID(info->bus, info->devfn);
362         init_rcu_head(&sdev->rcu);
363         if (info->ats_enabled) {
364                 sdev->qdep = info->ats_qdep;
365                 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
366                         sdev->qdep = 0;
367         }
368
369         /* Setup the pasid table: */
370         sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
371         ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, pasid,
372                                             FLPT_DEFAULT_DID, sflags);
373         if (ret)
374                 goto free_sdev;
375
376         list_add_rcu(&sdev->list, &svm->devs);
377
378         return 0;
379
380 free_sdev:
381         kfree(sdev);
382 free_svm:
383         if (list_empty(&svm->devs)) {
384                 mmu_notifier_unregister(&svm->notifier, mm);
385                 pasid_private_remove(pasid);
386                 kfree(svm);
387         }
388
389         return ret;
390 }
391
392 void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
393 {
394         struct intel_svm_dev *sdev;
395         struct intel_svm *svm;
396         struct mm_struct *mm;
397
398         if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev))
399                 return;
400         mm = svm->mm;
401
402         if (sdev) {
403                 list_del_rcu(&sdev->list);
404                 kfree_rcu(sdev, rcu);
405
406                 if (list_empty(&svm->devs)) {
407                         if (svm->notifier.ops)
408                                 mmu_notifier_unregister(&svm->notifier, mm);
409                         pasid_private_remove(svm->pasid);
410                         /*
411                          * We mandate that no page faults may be outstanding
412                          * for the PASID when intel_svm_unbind_mm() is called.
413                          * If that is not obeyed, subtle errors will happen.
414                          * Let's make them less subtle...
415                          */
416                         memset(svm, 0x6b, sizeof(*svm));
417                         kfree(svm);
418                 }
419         }
420 }
421
422 /* Page request queue descriptor */
423 struct page_req_dsc {
424         union {
425                 struct {
426                         u64 type:8;
427                         u64 pasid_present:1;
428                         u64 priv_data_present:1;
429                         u64 rsvd:6;
430                         u64 rid:16;
431                         u64 pasid:20;
432                         u64 exe_req:1;
433                         u64 pm_req:1;
434                         u64 rsvd2:10;
435                 };
436                 u64 qw_0;
437         };
438         union {
439                 struct {
440                         u64 rd_req:1;
441                         u64 wr_req:1;
442                         u64 lpig:1;
443                         u64 prg_index:9;
444                         u64 addr:52;
445                 };
446                 u64 qw_1;
447         };
448         u64 priv_data[2];
449 };
450
451 static bool is_canonical_address(u64 addr)
452 {
453         int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
454         long saddr = (long) addr;
455
456         return (((saddr << shift) >> shift) == saddr);
457 }
458
459 /**
460  * intel_drain_pasid_prq - Drain page requests and responses for a pasid
461  * @dev: target device
462  * @pasid: pasid for draining
463  *
464  * Drain all pending page requests and responses related to @pasid in both
465  * software and hardware. This is supposed to be called after the device
466  * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
467  * and DevTLB have been invalidated.
468  *
469  * It waits until all pending page requests for @pasid in the page fault
470  * queue are completed by the prq handling thread. Then follow the steps
471  * described in VT-d spec CH7.10 to drain all page requests and page
472  * responses pending in the hardware.
473  */
474 void intel_drain_pasid_prq(struct device *dev, u32 pasid)
475 {
476         struct device_domain_info *info;
477         struct dmar_domain *domain;
478         struct intel_iommu *iommu;
479         struct qi_desc desc[3];
480         struct pci_dev *pdev;
481         int head, tail;
482         u16 sid, did;
483         int qdep;
484
485         info = dev_iommu_priv_get(dev);
486         if (WARN_ON(!info || !dev_is_pci(dev)))
487                 return;
488
489         if (!info->pri_enabled)
490                 return;
491
492         iommu = info->iommu;
493         domain = info->domain;
494         pdev = to_pci_dev(dev);
495         sid = PCI_DEVID(info->bus, info->devfn);
496         did = domain_id_iommu(domain, iommu);
497         qdep = pci_ats_queue_depth(pdev);
498
499         /*
500          * Check and wait until all pending page requests in the queue are
501          * handled by the prq handling thread.
502          */
503 prq_retry:
504         reinit_completion(&iommu->prq_complete);
505         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
506         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
507         while (head != tail) {
508                 struct page_req_dsc *req;
509
510                 req = &iommu->prq[head / sizeof(*req)];
511                 if (!req->pasid_present || req->pasid != pasid) {
512                         head = (head + sizeof(*req)) & PRQ_RING_MASK;
513                         continue;
514                 }
515
516                 wait_for_completion(&iommu->prq_complete);
517                 goto prq_retry;
518         }
519
520         iopf_queue_flush_dev(dev);
521
522         /*
523          * Perform steps described in VT-d spec CH7.10 to drain page
524          * requests and responses in hardware.
525          */
526         memset(desc, 0, sizeof(desc));
527         desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
528                         QI_IWD_FENCE |
529                         QI_IWD_TYPE;
530         desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
531                         QI_EIOTLB_DID(did) |
532                         QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
533                         QI_EIOTLB_TYPE;
534         desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
535                         QI_DEV_EIOTLB_SID(sid) |
536                         QI_DEV_EIOTLB_QDEP(qdep) |
537                         QI_DEIOTLB_TYPE |
538                         QI_DEV_IOTLB_PFSID(info->pfsid);
539 qi_retry:
540         reinit_completion(&iommu->prq_complete);
541         qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
542         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
543                 wait_for_completion(&iommu->prq_complete);
544                 goto qi_retry;
545         }
546 }
547
548 static int prq_to_iommu_prot(struct page_req_dsc *req)
549 {
550         int prot = 0;
551
552         if (req->rd_req)
553                 prot |= IOMMU_FAULT_PERM_READ;
554         if (req->wr_req)
555                 prot |= IOMMU_FAULT_PERM_WRITE;
556         if (req->exe_req)
557                 prot |= IOMMU_FAULT_PERM_EXEC;
558         if (req->pm_req)
559                 prot |= IOMMU_FAULT_PERM_PRIV;
560
561         return prot;
562 }
563
564 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
565                                 struct page_req_dsc *desc)
566 {
567         struct iopf_fault event = { };
568
569         if (!dev || !dev_is_pci(dev))
570                 return -ENODEV;
571
572         /* Fill in event data for device specific processing */
573         event.fault.type = IOMMU_FAULT_PAGE_REQ;
574         event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT;
575         event.fault.prm.pasid = desc->pasid;
576         event.fault.prm.grpid = desc->prg_index;
577         event.fault.prm.perm = prq_to_iommu_prot(desc);
578
579         if (desc->lpig)
580                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
581         if (desc->pasid_present) {
582                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
583                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
584         }
585         if (desc->priv_data_present) {
586                 /*
587                  * Set last page in group bit if private data is present,
588                  * page response is required as it does for LPIG.
589                  * iommu_report_device_fault() doesn't understand this vendor
590                  * specific requirement thus we set last_page as a workaround.
591                  */
592                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
593                 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
594                 event.fault.prm.private_data[0] = desc->priv_data[0];
595                 event.fault.prm.private_data[1] = desc->priv_data[1];
596         } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
597                 /*
598                  * If the private data fields are not used by hardware, use it
599                  * to monitor the prq handle latency.
600                  */
601                 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
602         }
603
604         return iommu_report_device_fault(dev, &event);
605 }
606
607 static void handle_bad_prq_event(struct intel_iommu *iommu,
608                                  struct page_req_dsc *req, int result)
609 {
610         struct qi_desc desc;
611
612         pr_err("%s: Invalid page request: %08llx %08llx\n",
613                iommu->name, ((unsigned long long *)req)[0],
614                ((unsigned long long *)req)[1]);
615
616         /*
617          * Per VT-d spec. v3.0 ch7.7, system software must
618          * respond with page group response if private data
619          * is present (PDP) or last page in group (LPIG) bit
620          * is set. This is an additional VT-d feature beyond
621          * PCI ATS spec.
622          */
623         if (!req->lpig && !req->priv_data_present)
624                 return;
625
626         desc.qw0 = QI_PGRP_PASID(req->pasid) |
627                         QI_PGRP_DID(req->rid) |
628                         QI_PGRP_PASID_P(req->pasid_present) |
629                         QI_PGRP_PDP(req->priv_data_present) |
630                         QI_PGRP_RESP_CODE(result) |
631                         QI_PGRP_RESP_TYPE;
632         desc.qw1 = QI_PGRP_IDX(req->prg_index) |
633                         QI_PGRP_LPIG(req->lpig);
634
635         if (req->priv_data_present) {
636                 desc.qw2 = req->priv_data[0];
637                 desc.qw3 = req->priv_data[1];
638         } else {
639                 desc.qw2 = 0;
640                 desc.qw3 = 0;
641         }
642
643         qi_submit_sync(iommu, &desc, 1, 0);
644 }
645
646 static irqreturn_t prq_event_thread(int irq, void *d)
647 {
648         struct intel_iommu *iommu = d;
649         struct page_req_dsc *req;
650         int head, tail, handled;
651         struct pci_dev *pdev;
652         u64 address;
653
654         /*
655          * Clear PPR bit before reading head/tail registers, to ensure that
656          * we get a new interrupt if needed.
657          */
658         writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
659
660         tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
661         head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
662         handled = (head != tail);
663         while (head != tail) {
664                 req = &iommu->prq[head / sizeof(*req)];
665                 address = (u64)req->addr << VTD_PAGE_SHIFT;
666
667                 if (unlikely(!req->pasid_present)) {
668                         pr_err("IOMMU: %s: Page request without PASID\n",
669                                iommu->name);
670 bad_req:
671                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
672                         goto prq_advance;
673                 }
674
675                 if (unlikely(!is_canonical_address(address))) {
676                         pr_err("IOMMU: %s: Address is not canonical\n",
677                                iommu->name);
678                         goto bad_req;
679                 }
680
681                 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
682                         pr_err("IOMMU: %s: Page request in Privilege Mode\n",
683                                iommu->name);
684                         goto bad_req;
685                 }
686
687                 if (unlikely(req->exe_req && req->rd_req)) {
688                         pr_err("IOMMU: %s: Execution request not supported\n",
689                                iommu->name);
690                         goto bad_req;
691                 }
692
693                 /* Drop Stop Marker message. No need for a response. */
694                 if (unlikely(req->lpig && !req->rd_req && !req->wr_req))
695                         goto prq_advance;
696
697                 pdev = pci_get_domain_bus_and_slot(iommu->segment,
698                                                    PCI_BUS_NUM(req->rid),
699                                                    req->rid & 0xff);
700                 /*
701                  * If prq is to be handled outside iommu driver via receiver of
702                  * the fault notifiers, we skip the page response here.
703                  */
704                 if (!pdev)
705                         goto bad_req;
706
707                 if (intel_svm_prq_report(iommu, &pdev->dev, req))
708                         handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
709                 else
710                         trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1,
711                                          req->priv_data[0], req->priv_data[1],
712                                          iommu->prq_seq_number++);
713                 pci_dev_put(pdev);
714 prq_advance:
715                 head = (head + sizeof(*req)) & PRQ_RING_MASK;
716         }
717
718         dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
719
720         /*
721          * Clear the page request overflow bit and wake up all threads that
722          * are waiting for the completion of this handling.
723          */
724         if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
725                 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n",
726                                     iommu->name);
727                 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
728                 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
729                 if (head == tail) {
730                         iopf_queue_discard_partial(iommu->iopf_queue);
731                         writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
732                         pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
733                                             iommu->name);
734                 }
735         }
736
737         if (!completion_done(&iommu->prq_complete))
738                 complete(&iommu->prq_complete);
739
740         return IRQ_RETVAL(handled);
741 }
742
743 int intel_svm_page_response(struct device *dev,
744                             struct iopf_fault *evt,
745                             struct iommu_page_response *msg)
746 {
747         struct device_domain_info *info = dev_iommu_priv_get(dev);
748         struct intel_iommu *iommu = info->iommu;
749         u8 bus = info->bus, devfn = info->devfn;
750         struct iommu_fault_page_request *prm;
751         bool private_present;
752         bool pasid_present;
753         bool last_page;
754         int ret = 0;
755         u16 sid;
756
757         prm = &evt->fault.prm;
758         sid = PCI_DEVID(bus, devfn);
759         pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
760         private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
761         last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
762
763         if (!pasid_present) {
764                 ret = -EINVAL;
765                 goto out;
766         }
767
768         if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
769                 ret = -EINVAL;
770                 goto out;
771         }
772
773         /*
774          * Per VT-d spec. v3.0 ch7.7, system software must respond
775          * with page group response if private data is present (PDP)
776          * or last page in group (LPIG) bit is set. This is an
777          * additional VT-d requirement beyond PCI ATS spec.
778          */
779         if (last_page || private_present) {
780                 struct qi_desc desc;
781
782                 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
783                                 QI_PGRP_PASID_P(pasid_present) |
784                                 QI_PGRP_PDP(private_present) |
785                                 QI_PGRP_RESP_CODE(msg->code) |
786                                 QI_PGRP_RESP_TYPE;
787                 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
788                 desc.qw2 = 0;
789                 desc.qw3 = 0;
790
791                 if (private_present) {
792                         desc.qw2 = prm->private_data[0];
793                         desc.qw3 = prm->private_data[1];
794                 } else if (prm->private_data[0]) {
795                         dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
796                                 ktime_to_ns(ktime_get()) - prm->private_data[0]);
797                 }
798
799                 qi_submit_sync(iommu, &desc, 1, 0);
800         }
801 out:
802         return ret;
803 }
804
805 static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
806                                    struct device *dev, ioasid_t pasid)
807 {
808         struct device_domain_info *info = dev_iommu_priv_get(dev);
809         struct intel_iommu *iommu = info->iommu;
810
811         return intel_svm_bind_mm(iommu, dev, domain, pasid);
812 }
813
814 static void intel_svm_domain_free(struct iommu_domain *domain)
815 {
816         kfree(to_dmar_domain(domain));
817 }
818
819 static const struct iommu_domain_ops intel_svm_domain_ops = {
820         .set_dev_pasid          = intel_svm_set_dev_pasid,
821         .free                   = intel_svm_domain_free
822 };
823
824 struct iommu_domain *intel_svm_domain_alloc(void)
825 {
826         struct dmar_domain *domain;
827
828         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
829         if (!domain)
830                 return NULL;
831         domain->domain.ops = &intel_svm_domain_ops;
832
833         return &domain->domain;
834 }