dt-bindings: soc: bcm: use absolute path to other schema
[linux-2.6-microblaze.git] / drivers / iommu / intel / dmar.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2006, Intel Corporation.
4  *
5  * Copyright (C) 2006-2008 Intel Corporation
6  * Author: Ashok Raj <ashok.raj@intel.com>
7  * Author: Shaohua Li <shaohua.li@intel.com>
8  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
9  *
10  * This file implements early detection/parsing of Remapping Devices
11  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
12  * tables.
13  *
14  * These routines are used by both DMA-remapping and Interrupt-remapping
15  */
16
17 #define pr_fmt(fmt)     "DMAR: " fmt
18
19 #include <linux/pci.h>
20 #include <linux/dmar.h>
21 #include <linux/iova.h>
22 #include <linux/intel-iommu.h>
23 #include <linux/timer.h>
24 #include <linux/irq.h>
25 #include <linux/interrupt.h>
26 #include <linux/tboot.h>
27 #include <linux/dmi.h>
28 #include <linux/slab.h>
29 #include <linux/iommu.h>
30 #include <linux/numa.h>
31 #include <linux/limits.h>
32 #include <asm/irq_remapping.h>
33 #include <trace/events/intel_iommu.h>
34
35 #include "../irq_remapping.h"
36 #include "perf.h"
37
38 typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
39 struct dmar_res_callback {
40         dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
41         void                    *arg[ACPI_DMAR_TYPE_RESERVED];
42         bool                    ignore_unhandled;
43         bool                    print_entry;
44 };
45
46 /*
47  * Assumptions:
48  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
49  *    before IO devices managed by that unit.
50  * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
51  *    after IO devices managed by that unit.
52  * 3) Hotplug events are rare.
53  *
54  * Locking rules for DMA and interrupt remapping related global data structures:
55  * 1) Use dmar_global_lock in process context
56  * 2) Use RCU in interrupt context
57  */
58 DECLARE_RWSEM(dmar_global_lock);
59 LIST_HEAD(dmar_drhd_units);
60
61 struct acpi_table_header * __initdata dmar_tbl;
62 static int dmar_dev_scope_status = 1;
63 static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
64
65 static int alloc_iommu(struct dmar_drhd_unit *drhd);
66 static void free_iommu(struct intel_iommu *iommu);
67
68 static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
69 {
70         /*
71          * add INCLUDE_ALL at the tail, so scan the list will find it at
72          * the very end.
73          */
74         if (drhd->include_all)
75                 list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
76         else
77                 list_add_rcu(&drhd->list, &dmar_drhd_units);
78 }
79
80 void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
81 {
82         struct acpi_dmar_device_scope *scope;
83
84         *cnt = 0;
85         while (start < end) {
86                 scope = start;
87                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
88                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
89                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
90                         (*cnt)++;
91                 else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
92                         scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
93                         pr_warn("Unsupported device scope\n");
94                 }
95                 start += scope->length;
96         }
97         if (*cnt == 0)
98                 return NULL;
99
100         return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
101 }
102
103 void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
104 {
105         int i;
106         struct device *tmp_dev;
107
108         if (*devices && *cnt) {
109                 for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
110                         put_device(tmp_dev);
111                 kfree(*devices);
112         }
113
114         *devices = NULL;
115         *cnt = 0;
116 }
117
118 /* Optimize out kzalloc()/kfree() for normal cases */
119 static char dmar_pci_notify_info_buf[64];
120
121 static struct dmar_pci_notify_info *
122 dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
123 {
124         int level = 0;
125         size_t size;
126         struct pci_dev *tmp;
127         struct dmar_pci_notify_info *info;
128
129         BUG_ON(dev->is_virtfn);
130
131         /*
132          * Ignore devices that have a domain number higher than what can
133          * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
134          */
135         if (pci_domain_nr(dev->bus) > U16_MAX)
136                 return NULL;
137
138         /* Only generate path[] for device addition event */
139         if (event == BUS_NOTIFY_ADD_DEVICE)
140                 for (tmp = dev; tmp; tmp = tmp->bus->self)
141                         level++;
142
143         size = struct_size(info, path, level);
144         if (size <= sizeof(dmar_pci_notify_info_buf)) {
145                 info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
146         } else {
147                 info = kzalloc(size, GFP_KERNEL);
148                 if (!info) {
149                         if (dmar_dev_scope_status == 0)
150                                 dmar_dev_scope_status = -ENOMEM;
151                         return NULL;
152                 }
153         }
154
155         info->event = event;
156         info->dev = dev;
157         info->seg = pci_domain_nr(dev->bus);
158         info->level = level;
159         if (event == BUS_NOTIFY_ADD_DEVICE) {
160                 for (tmp = dev; tmp; tmp = tmp->bus->self) {
161                         level--;
162                         info->path[level].bus = tmp->bus->number;
163                         info->path[level].device = PCI_SLOT(tmp->devfn);
164                         info->path[level].function = PCI_FUNC(tmp->devfn);
165                         if (pci_is_root_bus(tmp->bus))
166                                 info->bus = tmp->bus->number;
167                 }
168         }
169
170         return info;
171 }
172
173 static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
174 {
175         if ((void *)info != dmar_pci_notify_info_buf)
176                 kfree(info);
177 }
178
179 static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
180                                 struct acpi_dmar_pci_path *path, int count)
181 {
182         int i;
183
184         if (info->bus != bus)
185                 goto fallback;
186         if (info->level != count)
187                 goto fallback;
188
189         for (i = 0; i < count; i++) {
190                 if (path[i].device != info->path[i].device ||
191                     path[i].function != info->path[i].function)
192                         goto fallback;
193         }
194
195         return true;
196
197 fallback:
198
199         if (count != 1)
200                 return false;
201
202         i = info->level - 1;
203         if (bus              == info->path[i].bus &&
204             path[0].device   == info->path[i].device &&
205             path[0].function == info->path[i].function) {
206                 pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
207                         bus, path[0].device, path[0].function);
208                 return true;
209         }
210
211         return false;
212 }
213
214 /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
215 int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
216                           void *start, void*end, u16 segment,
217                           struct dmar_dev_scope *devices,
218                           int devices_cnt)
219 {
220         int i, level;
221         struct device *tmp, *dev = &info->dev->dev;
222         struct acpi_dmar_device_scope *scope;
223         struct acpi_dmar_pci_path *path;
224
225         if (segment != info->seg)
226                 return 0;
227
228         for (; start < end; start += scope->length) {
229                 scope = start;
230                 if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
231                     scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
232                         continue;
233
234                 path = (struct acpi_dmar_pci_path *)(scope + 1);
235                 level = (scope->length - sizeof(*scope)) / sizeof(*path);
236                 if (!dmar_match_pci_path(info, scope->bus, path, level))
237                         continue;
238
239                 /*
240                  * We expect devices with endpoint scope to have normal PCI
241                  * headers, and devices with bridge scope to have bridge PCI
242                  * headers.  However PCI NTB devices may be listed in the
243                  * DMAR table with bridge scope, even though they have a
244                  * normal PCI header.  NTB devices are identified by class
245                  * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
246                  * for this special case.
247                  */
248                 if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
249                      info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
250                     (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
251                      (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
252                       info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
253                         pr_warn("Device scope type does not match for %s\n",
254                                 pci_name(info->dev));
255                         return -EINVAL;
256                 }
257
258                 for_each_dev_scope(devices, devices_cnt, i, tmp)
259                         if (tmp == NULL) {
260                                 devices[i].bus = info->dev->bus->number;
261                                 devices[i].devfn = info->dev->devfn;
262                                 rcu_assign_pointer(devices[i].dev,
263                                                    get_device(dev));
264                                 return 1;
265                         }
266                 BUG_ON(i >= devices_cnt);
267         }
268
269         return 0;
270 }
271
272 int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
273                           struct dmar_dev_scope *devices, int count)
274 {
275         int index;
276         struct device *tmp;
277
278         if (info->seg != segment)
279                 return 0;
280
281         for_each_active_dev_scope(devices, count, index, tmp)
282                 if (tmp == &info->dev->dev) {
283                         RCU_INIT_POINTER(devices[index].dev, NULL);
284                         synchronize_rcu();
285                         put_device(tmp);
286                         return 1;
287                 }
288
289         return 0;
290 }
291
292 static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
293 {
294         int ret = 0;
295         struct dmar_drhd_unit *dmaru;
296         struct acpi_dmar_hardware_unit *drhd;
297
298         for_each_drhd_unit(dmaru) {
299                 if (dmaru->include_all)
300                         continue;
301
302                 drhd = container_of(dmaru->hdr,
303                                     struct acpi_dmar_hardware_unit, header);
304                 ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
305                                 ((void *)drhd) + drhd->header.length,
306                                 dmaru->segment,
307                                 dmaru->devices, dmaru->devices_cnt);
308                 if (ret)
309                         break;
310         }
311         if (ret >= 0)
312                 ret = dmar_iommu_notify_scope_dev(info);
313         if (ret < 0 && dmar_dev_scope_status == 0)
314                 dmar_dev_scope_status = ret;
315
316         if (ret >= 0)
317                 intel_irq_remap_add_device(info);
318
319         return ret;
320 }
321
322 static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
323 {
324         struct dmar_drhd_unit *dmaru;
325
326         for_each_drhd_unit(dmaru)
327                 if (dmar_remove_dev_scope(info, dmaru->segment,
328                         dmaru->devices, dmaru->devices_cnt))
329                         break;
330         dmar_iommu_notify_scope_dev(info);
331 }
332
333 static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
334 {
335         struct pci_dev *physfn = pci_physfn(pdev);
336
337         dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
338 }
339
340 static int dmar_pci_bus_notifier(struct notifier_block *nb,
341                                  unsigned long action, void *data)
342 {
343         struct pci_dev *pdev = to_pci_dev(data);
344         struct dmar_pci_notify_info *info;
345
346         /* Only care about add/remove events for physical functions.
347          * For VFs we actually do the lookup based on the corresponding
348          * PF in device_to_iommu() anyway. */
349         if (pdev->is_virtfn) {
350                 /*
351                  * Ensure that the VF device inherits the irq domain of the
352                  * PF device. Ideally the device would inherit the domain
353                  * from the bus, but DMAR can have multiple units per bus
354                  * which makes this impossible. The VF 'bus' could inherit
355                  * from the PF device, but that's yet another x86'sism to
356                  * inflict on everybody else.
357                  */
358                 if (action == BUS_NOTIFY_ADD_DEVICE)
359                         vf_inherit_msi_domain(pdev);
360                 return NOTIFY_DONE;
361         }
362
363         if (action != BUS_NOTIFY_ADD_DEVICE &&
364             action != BUS_NOTIFY_REMOVED_DEVICE)
365                 return NOTIFY_DONE;
366
367         info = dmar_alloc_pci_notify_info(pdev, action);
368         if (!info)
369                 return NOTIFY_DONE;
370
371         down_write(&dmar_global_lock);
372         if (action == BUS_NOTIFY_ADD_DEVICE)
373                 dmar_pci_bus_add_dev(info);
374         else if (action == BUS_NOTIFY_REMOVED_DEVICE)
375                 dmar_pci_bus_del_dev(info);
376         up_write(&dmar_global_lock);
377
378         dmar_free_pci_notify_info(info);
379
380         return NOTIFY_OK;
381 }
382
383 static struct notifier_block dmar_pci_bus_nb = {
384         .notifier_call = dmar_pci_bus_notifier,
385         .priority = INT_MIN,
386 };
387
388 static struct dmar_drhd_unit *
389 dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
390 {
391         struct dmar_drhd_unit *dmaru;
392
393         list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
394                                 dmar_rcu_check())
395                 if (dmaru->segment == drhd->segment &&
396                     dmaru->reg_base_addr == drhd->address)
397                         return dmaru;
398
399         return NULL;
400 }
401
402 /*
403  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
404  * structure which uniquely represent one DMA remapping hardware unit
405  * present in the platform
406  */
407 static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
408 {
409         struct acpi_dmar_hardware_unit *drhd;
410         struct dmar_drhd_unit *dmaru;
411         int ret;
412
413         drhd = (struct acpi_dmar_hardware_unit *)header;
414         dmaru = dmar_find_dmaru(drhd);
415         if (dmaru)
416                 goto out;
417
418         dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
419         if (!dmaru)
420                 return -ENOMEM;
421
422         /*
423          * If header is allocated from slab by ACPI _DSM method, we need to
424          * copy the content because the memory buffer will be freed on return.
425          */
426         dmaru->hdr = (void *)(dmaru + 1);
427         memcpy(dmaru->hdr, header, header->length);
428         dmaru->reg_base_addr = drhd->address;
429         dmaru->segment = drhd->segment;
430         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
431         dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
432                                               ((void *)drhd) + drhd->header.length,
433                                               &dmaru->devices_cnt);
434         if (dmaru->devices_cnt && dmaru->devices == NULL) {
435                 kfree(dmaru);
436                 return -ENOMEM;
437         }
438
439         ret = alloc_iommu(dmaru);
440         if (ret) {
441                 dmar_free_dev_scope(&dmaru->devices,
442                                     &dmaru->devices_cnt);
443                 kfree(dmaru);
444                 return ret;
445         }
446         dmar_register_drhd_unit(dmaru);
447
448 out:
449         if (arg)
450                 (*(int *)arg)++;
451
452         return 0;
453 }
454
455 static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
456 {
457         if (dmaru->devices && dmaru->devices_cnt)
458                 dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
459         if (dmaru->iommu)
460                 free_iommu(dmaru->iommu);
461         kfree(dmaru);
462 }
463
464 static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
465                                       void *arg)
466 {
467         struct acpi_dmar_andd *andd = (void *)header;
468
469         /* Check for NUL termination within the designated length */
470         if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
471                 pr_warn(FW_BUG
472                            "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
473                            "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
474                            dmi_get_system_info(DMI_BIOS_VENDOR),
475                            dmi_get_system_info(DMI_BIOS_VERSION),
476                            dmi_get_system_info(DMI_PRODUCT_VERSION));
477                 add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
478                 return -EINVAL;
479         }
480         pr_info("ANDD device: %x name: %s\n", andd->device_number,
481                 andd->device_name);
482
483         return 0;
484 }
485
486 #ifdef CONFIG_ACPI_NUMA
487 static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
488 {
489         struct acpi_dmar_rhsa *rhsa;
490         struct dmar_drhd_unit *drhd;
491
492         rhsa = (struct acpi_dmar_rhsa *)header;
493         for_each_drhd_unit(drhd) {
494                 if (drhd->reg_base_addr == rhsa->base_address) {
495                         int node = pxm_to_node(rhsa->proximity_domain);
496
497                         if (!node_online(node))
498                                 node = NUMA_NO_NODE;
499                         drhd->iommu->node = node;
500                         return 0;
501                 }
502         }
503         pr_warn(FW_BUG
504                 "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
505                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
506                 rhsa->base_address,
507                 dmi_get_system_info(DMI_BIOS_VENDOR),
508                 dmi_get_system_info(DMI_BIOS_VERSION),
509                 dmi_get_system_info(DMI_PRODUCT_VERSION));
510         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
511
512         return 0;
513 }
514 #else
515 #define dmar_parse_one_rhsa             dmar_res_noop
516 #endif
517
518 static void
519 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
520 {
521         struct acpi_dmar_hardware_unit *drhd;
522         struct acpi_dmar_reserved_memory *rmrr;
523         struct acpi_dmar_atsr *atsr;
524         struct acpi_dmar_rhsa *rhsa;
525         struct acpi_dmar_satc *satc;
526
527         switch (header->type) {
528         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
529                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
530                                     header);
531                 pr_info("DRHD base: %#016Lx flags: %#x\n",
532                         (unsigned long long)drhd->address, drhd->flags);
533                 break;
534         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
535                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
536                                     header);
537                 pr_info("RMRR base: %#016Lx end: %#016Lx\n",
538                         (unsigned long long)rmrr->base_address,
539                         (unsigned long long)rmrr->end_address);
540                 break;
541         case ACPI_DMAR_TYPE_ROOT_ATS:
542                 atsr = container_of(header, struct acpi_dmar_atsr, header);
543                 pr_info("ATSR flags: %#x\n", atsr->flags);
544                 break;
545         case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
546                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
547                 pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
548                        (unsigned long long)rhsa->base_address,
549                        rhsa->proximity_domain);
550                 break;
551         case ACPI_DMAR_TYPE_NAMESPACE:
552                 /* We don't print this here because we need to sanity-check
553                    it first. So print it in dmar_parse_one_andd() instead. */
554                 break;
555         case ACPI_DMAR_TYPE_SATC:
556                 satc = container_of(header, struct acpi_dmar_satc, header);
557                 pr_info("SATC flags: 0x%x\n", satc->flags);
558                 break;
559         }
560 }
561
562 /**
563  * dmar_table_detect - checks to see if the platform supports DMAR devices
564  */
565 static int __init dmar_table_detect(void)
566 {
567         acpi_status status = AE_OK;
568
569         /* if we could find DMAR table, then there are DMAR devices */
570         status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
571
572         if (ACPI_SUCCESS(status) && !dmar_tbl) {
573                 pr_warn("Unable to map DMAR\n");
574                 status = AE_NOT_FOUND;
575         }
576
577         return ACPI_SUCCESS(status) ? 0 : -ENOENT;
578 }
579
580 static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
581                                        size_t len, struct dmar_res_callback *cb)
582 {
583         struct acpi_dmar_header *iter, *next;
584         struct acpi_dmar_header *end = ((void *)start) + len;
585
586         for (iter = start; iter < end; iter = next) {
587                 next = (void *)iter + iter->length;
588                 if (iter->length == 0) {
589                         /* Avoid looping forever on bad ACPI tables */
590                         pr_debug(FW_BUG "Invalid 0-length structure\n");
591                         break;
592                 } else if (next > end) {
593                         /* Avoid passing table end */
594                         pr_warn(FW_BUG "Record passes table end\n");
595                         return -EINVAL;
596                 }
597
598                 if (cb->print_entry)
599                         dmar_table_print_dmar_entry(iter);
600
601                 if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
602                         /* continue for forward compatibility */
603                         pr_debug("Unknown DMAR structure type %d\n",
604                                  iter->type);
605                 } else if (cb->cb[iter->type]) {
606                         int ret;
607
608                         ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
609                         if (ret)
610                                 return ret;
611                 } else if (!cb->ignore_unhandled) {
612                         pr_warn("No handler for DMAR structure type %d\n",
613                                 iter->type);
614                         return -EINVAL;
615                 }
616         }
617
618         return 0;
619 }
620
621 static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
622                                        struct dmar_res_callback *cb)
623 {
624         return dmar_walk_remapping_entries((void *)(dmar + 1),
625                         dmar->header.length - sizeof(*dmar), cb);
626 }
627
628 /**
629  * parse_dmar_table - parses the DMA reporting table
630  */
631 static int __init
632 parse_dmar_table(void)
633 {
634         struct acpi_table_dmar *dmar;
635         int drhd_count = 0;
636         int ret;
637         struct dmar_res_callback cb = {
638                 .print_entry = true,
639                 .ignore_unhandled = true,
640                 .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
641                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
642                 .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
643                 .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
644                 .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
645                 .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
646                 .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
647         };
648
649         /*
650          * Do it again, earlier dmar_tbl mapping could be mapped with
651          * fixed map.
652          */
653         dmar_table_detect();
654
655         /*
656          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
657          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
658          */
659         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
660
661         dmar = (struct acpi_table_dmar *)dmar_tbl;
662         if (!dmar)
663                 return -ENODEV;
664
665         if (dmar->width < PAGE_SHIFT - 1) {
666                 pr_warn("Invalid DMAR haw\n");
667                 return -EINVAL;
668         }
669
670         pr_info("Host address width %d\n", dmar->width + 1);
671         ret = dmar_walk_dmar_table(dmar, &cb);
672         if (ret == 0 && drhd_count == 0)
673                 pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
674
675         return ret;
676 }
677
678 static int dmar_pci_device_match(struct dmar_dev_scope devices[],
679                                  int cnt, struct pci_dev *dev)
680 {
681         int index;
682         struct device *tmp;
683
684         while (dev) {
685                 for_each_active_dev_scope(devices, cnt, index, tmp)
686                         if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
687                                 return 1;
688
689                 /* Check our parent */
690                 dev = dev->bus->self;
691         }
692
693         return 0;
694 }
695
696 struct dmar_drhd_unit *
697 dmar_find_matched_drhd_unit(struct pci_dev *dev)
698 {
699         struct dmar_drhd_unit *dmaru;
700         struct acpi_dmar_hardware_unit *drhd;
701
702         dev = pci_physfn(dev);
703
704         rcu_read_lock();
705         for_each_drhd_unit(dmaru) {
706                 drhd = container_of(dmaru->hdr,
707                                     struct acpi_dmar_hardware_unit,
708                                     header);
709
710                 if (dmaru->include_all &&
711                     drhd->segment == pci_domain_nr(dev->bus))
712                         goto out;
713
714                 if (dmar_pci_device_match(dmaru->devices,
715                                           dmaru->devices_cnt, dev))
716                         goto out;
717         }
718         dmaru = NULL;
719 out:
720         rcu_read_unlock();
721
722         return dmaru;
723 }
724
725 static void __init dmar_acpi_insert_dev_scope(u8 device_number,
726                                               struct acpi_device *adev)
727 {
728         struct dmar_drhd_unit *dmaru;
729         struct acpi_dmar_hardware_unit *drhd;
730         struct acpi_dmar_device_scope *scope;
731         struct device *tmp;
732         int i;
733         struct acpi_dmar_pci_path *path;
734
735         for_each_drhd_unit(dmaru) {
736                 drhd = container_of(dmaru->hdr,
737                                     struct acpi_dmar_hardware_unit,
738                                     header);
739
740                 for (scope = (void *)(drhd + 1);
741                      (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
742                      scope = ((void *)scope) + scope->length) {
743                         if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
744                                 continue;
745                         if (scope->enumeration_id != device_number)
746                                 continue;
747
748                         path = (void *)(scope + 1);
749                         pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
750                                 dev_name(&adev->dev), dmaru->reg_base_addr,
751                                 scope->bus, path->device, path->function);
752                         for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
753                                 if (tmp == NULL) {
754                                         dmaru->devices[i].bus = scope->bus;
755                                         dmaru->devices[i].devfn = PCI_DEVFN(path->device,
756                                                                             path->function);
757                                         rcu_assign_pointer(dmaru->devices[i].dev,
758                                                            get_device(&adev->dev));
759                                         return;
760                                 }
761                         BUG_ON(i >= dmaru->devices_cnt);
762                 }
763         }
764         pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
765                 device_number, dev_name(&adev->dev));
766 }
767
768 static int __init dmar_acpi_dev_scope_init(void)
769 {
770         struct acpi_dmar_andd *andd;
771
772         if (dmar_tbl == NULL)
773                 return -ENODEV;
774
775         for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
776              ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
777              andd = ((void *)andd) + andd->header.length) {
778                 if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
779                         acpi_handle h;
780                         struct acpi_device *adev;
781
782                         if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
783                                                           andd->device_name,
784                                                           &h))) {
785                                 pr_err("Failed to find handle for ACPI object %s\n",
786                                        andd->device_name);
787                                 continue;
788                         }
789                         adev = acpi_fetch_acpi_dev(h);
790                         if (!adev) {
791                                 pr_err("Failed to get device for ACPI object %s\n",
792                                        andd->device_name);
793                                 continue;
794                         }
795                         dmar_acpi_insert_dev_scope(andd->device_number, adev);
796                 }
797         }
798         return 0;
799 }
800
801 int __init dmar_dev_scope_init(void)
802 {
803         struct pci_dev *dev = NULL;
804         struct dmar_pci_notify_info *info;
805
806         if (dmar_dev_scope_status != 1)
807                 return dmar_dev_scope_status;
808
809         if (list_empty(&dmar_drhd_units)) {
810                 dmar_dev_scope_status = -ENODEV;
811         } else {
812                 dmar_dev_scope_status = 0;
813
814                 dmar_acpi_dev_scope_init();
815
816                 for_each_pci_dev(dev) {
817                         if (dev->is_virtfn)
818                                 continue;
819
820                         info = dmar_alloc_pci_notify_info(dev,
821                                         BUS_NOTIFY_ADD_DEVICE);
822                         if (!info) {
823                                 return dmar_dev_scope_status;
824                         } else {
825                                 dmar_pci_bus_add_dev(info);
826                                 dmar_free_pci_notify_info(info);
827                         }
828                 }
829         }
830
831         return dmar_dev_scope_status;
832 }
833
834 void __init dmar_register_bus_notifier(void)
835 {
836         bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
837 }
838
839
840 int __init dmar_table_init(void)
841 {
842         static int dmar_table_initialized;
843         int ret;
844
845         if (dmar_table_initialized == 0) {
846                 ret = parse_dmar_table();
847                 if (ret < 0) {
848                         if (ret != -ENODEV)
849                                 pr_info("Parse DMAR table failure.\n");
850                 } else  if (list_empty(&dmar_drhd_units)) {
851                         pr_info("No DMAR devices found\n");
852                         ret = -ENODEV;
853                 }
854
855                 if (ret < 0)
856                         dmar_table_initialized = ret;
857                 else
858                         dmar_table_initialized = 1;
859         }
860
861         return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
862 }
863
864 static void warn_invalid_dmar(u64 addr, const char *message)
865 {
866         pr_warn_once(FW_BUG
867                 "Your BIOS is broken; DMAR reported at address %llx%s!\n"
868                 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
869                 addr, message,
870                 dmi_get_system_info(DMI_BIOS_VENDOR),
871                 dmi_get_system_info(DMI_BIOS_VERSION),
872                 dmi_get_system_info(DMI_PRODUCT_VERSION));
873         add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
874 }
875
876 static int __ref
877 dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
878 {
879         struct acpi_dmar_hardware_unit *drhd;
880         void __iomem *addr;
881         u64 cap, ecap;
882
883         drhd = (void *)entry;
884         if (!drhd->address) {
885                 warn_invalid_dmar(0, "");
886                 return -EINVAL;
887         }
888
889         if (arg)
890                 addr = ioremap(drhd->address, VTD_PAGE_SIZE);
891         else
892                 addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
893         if (!addr) {
894                 pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
895                 return -EINVAL;
896         }
897
898         cap = dmar_readq(addr + DMAR_CAP_REG);
899         ecap = dmar_readq(addr + DMAR_ECAP_REG);
900
901         if (arg)
902                 iounmap(addr);
903         else
904                 early_iounmap(addr, VTD_PAGE_SIZE);
905
906         if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
907                 warn_invalid_dmar(drhd->address, " returns all ones");
908                 return -EINVAL;
909         }
910
911         return 0;
912 }
913
914 void __init detect_intel_iommu(void)
915 {
916         int ret;
917         struct dmar_res_callback validate_drhd_cb = {
918                 .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
919                 .ignore_unhandled = true,
920         };
921
922         down_write(&dmar_global_lock);
923         ret = dmar_table_detect();
924         if (!ret)
925                 ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
926                                            &validate_drhd_cb);
927         if (!ret && !no_iommu && !iommu_detected &&
928             (!dmar_disabled || dmar_platform_optin())) {
929                 iommu_detected = 1;
930                 /* Make sure ACS will be enabled */
931                 pci_request_acs();
932         }
933
934 #ifdef CONFIG_X86
935         if (!ret) {
936                 x86_init.iommu.iommu_init = intel_iommu_init;
937                 x86_platform.iommu_shutdown = intel_iommu_shutdown;
938         }
939
940 #endif
941
942         if (dmar_tbl) {
943                 acpi_put_table(dmar_tbl);
944                 dmar_tbl = NULL;
945         }
946         up_write(&dmar_global_lock);
947 }
948
949 static void unmap_iommu(struct intel_iommu *iommu)
950 {
951         iounmap(iommu->reg);
952         release_mem_region(iommu->reg_phys, iommu->reg_size);
953 }
954
955 /**
956  * map_iommu: map the iommu's registers
957  * @iommu: the iommu to map
958  * @phys_addr: the physical address of the base resgister
959  *
960  * Memory map the iommu's registers.  Start w/ a single page, and
961  * possibly expand if that turns out to be insufficent.
962  */
963 static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
964 {
965         int map_size, err=0;
966
967         iommu->reg_phys = phys_addr;
968         iommu->reg_size = VTD_PAGE_SIZE;
969
970         if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
971                 pr_err("Can't reserve memory\n");
972                 err = -EBUSY;
973                 goto out;
974         }
975
976         iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
977         if (!iommu->reg) {
978                 pr_err("Can't map the region\n");
979                 err = -ENOMEM;
980                 goto release;
981         }
982
983         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
984         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
985
986         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
987                 err = -EINVAL;
988                 warn_invalid_dmar(phys_addr, " returns all ones");
989                 goto unmap;
990         }
991         if (ecap_vcs(iommu->ecap))
992                 iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
993
994         /* the registers might be more than one page */
995         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
996                          cap_max_fault_reg_offset(iommu->cap));
997         map_size = VTD_PAGE_ALIGN(map_size);
998         if (map_size > iommu->reg_size) {
999                 iounmap(iommu->reg);
1000                 release_mem_region(iommu->reg_phys, iommu->reg_size);
1001                 iommu->reg_size = map_size;
1002                 if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1003                                         iommu->name)) {
1004                         pr_err("Can't reserve memory\n");
1005                         err = -EBUSY;
1006                         goto out;
1007                 }
1008                 iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1009                 if (!iommu->reg) {
1010                         pr_err("Can't map the region\n");
1011                         err = -ENOMEM;
1012                         goto release;
1013                 }
1014         }
1015         err = 0;
1016         goto out;
1017
1018 unmap:
1019         iounmap(iommu->reg);
1020 release:
1021         release_mem_region(iommu->reg_phys, iommu->reg_size);
1022 out:
1023         return err;
1024 }
1025
1026 static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1027 {
1028         iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1029                                             DMAR_UNITS_SUPPORTED);
1030         if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1031                 iommu->seq_id = -1;
1032         } else {
1033                 set_bit(iommu->seq_id, dmar_seq_ids);
1034                 sprintf(iommu->name, "dmar%d", iommu->seq_id);
1035         }
1036
1037         return iommu->seq_id;
1038 }
1039
1040 static void dmar_free_seq_id(struct intel_iommu *iommu)
1041 {
1042         if (iommu->seq_id >= 0) {
1043                 clear_bit(iommu->seq_id, dmar_seq_ids);
1044                 iommu->seq_id = -1;
1045         }
1046 }
1047
1048 static int alloc_iommu(struct dmar_drhd_unit *drhd)
1049 {
1050         struct intel_iommu *iommu;
1051         u32 ver, sts;
1052         int agaw = -1;
1053         int msagaw = -1;
1054         int err;
1055
1056         if (!drhd->reg_base_addr) {
1057                 warn_invalid_dmar(0, "");
1058                 return -EINVAL;
1059         }
1060
1061         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1062         if (!iommu)
1063                 return -ENOMEM;
1064
1065         if (dmar_alloc_seq_id(iommu) < 0) {
1066                 pr_err("Failed to allocate seq_id\n");
1067                 err = -ENOSPC;
1068                 goto error;
1069         }
1070
1071         err = map_iommu(iommu, drhd->reg_base_addr);
1072         if (err) {
1073                 pr_err("Failed to map %s\n", iommu->name);
1074                 goto error_free_seq_id;
1075         }
1076
1077         err = -EINVAL;
1078         if (cap_sagaw(iommu->cap) == 0) {
1079                 pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1080                         iommu->name);
1081                 drhd->ignored = 1;
1082         }
1083
1084         if (!drhd->ignored) {
1085                 agaw = iommu_calculate_agaw(iommu);
1086                 if (agaw < 0) {
1087                         pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1088                                iommu->seq_id);
1089                         drhd->ignored = 1;
1090                 }
1091         }
1092         if (!drhd->ignored) {
1093                 msagaw = iommu_calculate_max_sagaw(iommu);
1094                 if (msagaw < 0) {
1095                         pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1096                                iommu->seq_id);
1097                         drhd->ignored = 1;
1098                         agaw = -1;
1099                 }
1100         }
1101         iommu->agaw = agaw;
1102         iommu->msagaw = msagaw;
1103         iommu->segment = drhd->segment;
1104
1105         iommu->node = NUMA_NO_NODE;
1106
1107         ver = readl(iommu->reg + DMAR_VER_REG);
1108         pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1109                 iommu->name,
1110                 (unsigned long long)drhd->reg_base_addr,
1111                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1112                 (unsigned long long)iommu->cap,
1113                 (unsigned long long)iommu->ecap);
1114
1115         /* Reflect status in gcmd */
1116         sts = readl(iommu->reg + DMAR_GSTS_REG);
1117         if (sts & DMA_GSTS_IRES)
1118                 iommu->gcmd |= DMA_GCMD_IRE;
1119         if (sts & DMA_GSTS_TES)
1120                 iommu->gcmd |= DMA_GCMD_TE;
1121         if (sts & DMA_GSTS_QIES)
1122                 iommu->gcmd |= DMA_GCMD_QIE;
1123
1124         raw_spin_lock_init(&iommu->register_lock);
1125
1126         /*
1127          * This is only for hotplug; at boot time intel_iommu_enabled won't
1128          * be set yet. When intel_iommu_init() runs, it registers the units
1129          * present at boot time, then sets intel_iommu_enabled.
1130          */
1131         if (intel_iommu_enabled && !drhd->ignored) {
1132                 err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1133                                              intel_iommu_groups,
1134                                              "%s", iommu->name);
1135                 if (err)
1136                         goto err_unmap;
1137
1138                 err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1139                 if (err)
1140                         goto err_sysfs;
1141         }
1142
1143         drhd->iommu = iommu;
1144         iommu->drhd = drhd;
1145
1146         return 0;
1147
1148 err_sysfs:
1149         iommu_device_sysfs_remove(&iommu->iommu);
1150 err_unmap:
1151         unmap_iommu(iommu);
1152 error_free_seq_id:
1153         dmar_free_seq_id(iommu);
1154 error:
1155         kfree(iommu);
1156         return err;
1157 }
1158
1159 static void free_iommu(struct intel_iommu *iommu)
1160 {
1161         if (intel_iommu_enabled && !iommu->drhd->ignored) {
1162                 iommu_device_unregister(&iommu->iommu);
1163                 iommu_device_sysfs_remove(&iommu->iommu);
1164         }
1165
1166         if (iommu->irq) {
1167                 if (iommu->pr_irq) {
1168                         free_irq(iommu->pr_irq, iommu);
1169                         dmar_free_hwirq(iommu->pr_irq);
1170                         iommu->pr_irq = 0;
1171                 }
1172                 free_irq(iommu->irq, iommu);
1173                 dmar_free_hwirq(iommu->irq);
1174                 iommu->irq = 0;
1175         }
1176
1177         if (iommu->qi) {
1178                 free_page((unsigned long)iommu->qi->desc);
1179                 kfree(iommu->qi->desc_status);
1180                 kfree(iommu->qi);
1181         }
1182
1183         if (iommu->reg)
1184                 unmap_iommu(iommu);
1185
1186         dmar_free_seq_id(iommu);
1187         kfree(iommu);
1188 }
1189
1190 /*
1191  * Reclaim all the submitted descriptors which have completed its work.
1192  */
1193 static inline void reclaim_free_desc(struct q_inval *qi)
1194 {
1195         while (qi->desc_status[qi->free_tail] == QI_DONE ||
1196                qi->desc_status[qi->free_tail] == QI_ABORT) {
1197                 qi->desc_status[qi->free_tail] = QI_FREE;
1198                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1199                 qi->free_cnt++;
1200         }
1201 }
1202
1203 static const char *qi_type_string(u8 type)
1204 {
1205         switch (type) {
1206         case QI_CC_TYPE:
1207                 return "Context-cache Invalidation";
1208         case QI_IOTLB_TYPE:
1209                 return "IOTLB Invalidation";
1210         case QI_DIOTLB_TYPE:
1211                 return "Device-TLB Invalidation";
1212         case QI_IEC_TYPE:
1213                 return "Interrupt Entry Cache Invalidation";
1214         case QI_IWD_TYPE:
1215                 return "Invalidation Wait";
1216         case QI_EIOTLB_TYPE:
1217                 return "PASID-based IOTLB Invalidation";
1218         case QI_PC_TYPE:
1219                 return "PASID-cache Invalidation";
1220         case QI_DEIOTLB_TYPE:
1221                 return "PASID-based Device-TLB Invalidation";
1222         case QI_PGRP_RESP_TYPE:
1223                 return "Page Group Response";
1224         default:
1225                 return "UNKNOWN";
1226         }
1227 }
1228
1229 static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1230 {
1231         unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1232         u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1233         struct qi_desc *desc = iommu->qi->desc + head;
1234
1235         if (fault & DMA_FSTS_IQE)
1236                 pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1237                        DMAR_IQER_REG_IQEI(iqe_err));
1238         if (fault & DMA_FSTS_ITE)
1239                 pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1240                        DMAR_IQER_REG_ITESID(iqe_err));
1241         if (fault & DMA_FSTS_ICE)
1242                 pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1243                        DMAR_IQER_REG_ICESID(iqe_err));
1244
1245         pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1246                qi_type_string(desc->qw0 & 0xf),
1247                (unsigned long long)desc->qw0,
1248                (unsigned long long)desc->qw1);
1249
1250         head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1251         head <<= qi_shift(iommu);
1252         desc = iommu->qi->desc + head;
1253
1254         pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1255                qi_type_string(desc->qw0 & 0xf),
1256                (unsigned long long)desc->qw0,
1257                (unsigned long long)desc->qw1);
1258 }
1259
1260 static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1261 {
1262         u32 fault;
1263         int head, tail;
1264         struct q_inval *qi = iommu->qi;
1265         int shift = qi_shift(iommu);
1266
1267         if (qi->desc_status[wait_index] == QI_ABORT)
1268                 return -EAGAIN;
1269
1270         fault = readl(iommu->reg + DMAR_FSTS_REG);
1271         if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1272                 qi_dump_fault(iommu, fault);
1273
1274         /*
1275          * If IQE happens, the head points to the descriptor associated
1276          * with the error. No new descriptors are fetched until the IQE
1277          * is cleared.
1278          */
1279         if (fault & DMA_FSTS_IQE) {
1280                 head = readl(iommu->reg + DMAR_IQH_REG);
1281                 if ((head >> shift) == index) {
1282                         struct qi_desc *desc = qi->desc + head;
1283
1284                         /*
1285                          * desc->qw2 and desc->qw3 are either reserved or
1286                          * used by software as private data. We won't print
1287                          * out these two qw's for security consideration.
1288                          */
1289                         memcpy(desc, qi->desc + (wait_index << shift),
1290                                1 << shift);
1291                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1292                         pr_info("Invalidation Queue Error (IQE) cleared\n");
1293                         return -EINVAL;
1294                 }
1295         }
1296
1297         /*
1298          * If ITE happens, all pending wait_desc commands are aborted.
1299          * No new descriptors are fetched until the ITE is cleared.
1300          */
1301         if (fault & DMA_FSTS_ITE) {
1302                 head = readl(iommu->reg + DMAR_IQH_REG);
1303                 head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1304                 head |= 1;
1305                 tail = readl(iommu->reg + DMAR_IQT_REG);
1306                 tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1307
1308                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1309                 pr_info("Invalidation Time-out Error (ITE) cleared\n");
1310
1311                 do {
1312                         if (qi->desc_status[head] == QI_IN_USE)
1313                                 qi->desc_status[head] = QI_ABORT;
1314                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1315                 } while (head != tail);
1316
1317                 if (qi->desc_status[wait_index] == QI_ABORT)
1318                         return -EAGAIN;
1319         }
1320
1321         if (fault & DMA_FSTS_ICE) {
1322                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1323                 pr_info("Invalidation Completion Error (ICE) cleared\n");
1324         }
1325
1326         return 0;
1327 }
1328
1329 /*
1330  * Function to submit invalidation descriptors of all types to the queued
1331  * invalidation interface(QI). Multiple descriptors can be submitted at a
1332  * time, a wait descriptor will be appended to each submission to ensure
1333  * hardware has completed the invalidation before return. Wait descriptors
1334  * can be part of the submission but it will not be polled for completion.
1335  */
1336 int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1337                    unsigned int count, unsigned long options)
1338 {
1339         struct q_inval *qi = iommu->qi;
1340         s64 devtlb_start_ktime = 0;
1341         s64 iotlb_start_ktime = 0;
1342         s64 iec_start_ktime = 0;
1343         struct qi_desc wait_desc;
1344         int wait_index, index;
1345         unsigned long flags;
1346         int offset, shift;
1347         int rc, i;
1348         u64 type;
1349
1350         if (!qi)
1351                 return 0;
1352
1353         type = desc->qw0 & GENMASK_ULL(3, 0);
1354
1355         if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1356             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1357                 iotlb_start_ktime = ktime_to_ns(ktime_get());
1358
1359         if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1360             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1361                 devtlb_start_ktime = ktime_to_ns(ktime_get());
1362
1363         if (type == QI_IEC_TYPE &&
1364             dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1365                 iec_start_ktime = ktime_to_ns(ktime_get());
1366
1367 restart:
1368         rc = 0;
1369
1370         raw_spin_lock_irqsave(&qi->q_lock, flags);
1371         /*
1372          * Check if we have enough empty slots in the queue to submit,
1373          * the calculation is based on:
1374          * # of desc + 1 wait desc + 1 space between head and tail
1375          */
1376         while (qi->free_cnt < count + 2) {
1377                 raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1378                 cpu_relax();
1379                 raw_spin_lock_irqsave(&qi->q_lock, flags);
1380         }
1381
1382         index = qi->free_head;
1383         wait_index = (index + count) % QI_LENGTH;
1384         shift = qi_shift(iommu);
1385
1386         for (i = 0; i < count; i++) {
1387                 offset = ((index + i) % QI_LENGTH) << shift;
1388                 memcpy(qi->desc + offset, &desc[i], 1 << shift);
1389                 qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1390                 trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1391                                 desc[i].qw2, desc[i].qw3);
1392         }
1393         qi->desc_status[wait_index] = QI_IN_USE;
1394
1395         wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1396                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1397         if (options & QI_OPT_WAIT_DRAIN)
1398                 wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1399         wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1400         wait_desc.qw2 = 0;
1401         wait_desc.qw3 = 0;
1402
1403         offset = wait_index << shift;
1404         memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1405
1406         qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1407         qi->free_cnt -= count + 1;
1408
1409         /*
1410          * update the HW tail register indicating the presence of
1411          * new descriptors.
1412          */
1413         writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1414
1415         while (qi->desc_status[wait_index] != QI_DONE) {
1416                 /*
1417                  * We will leave the interrupts disabled, to prevent interrupt
1418                  * context to queue another cmd while a cmd is already submitted
1419                  * and waiting for completion on this cpu. This is to avoid
1420                  * a deadlock where the interrupt context can wait indefinitely
1421                  * for free slots in the queue.
1422                  */
1423                 rc = qi_check_fault(iommu, index, wait_index);
1424                 if (rc)
1425                         break;
1426
1427                 raw_spin_unlock(&qi->q_lock);
1428                 cpu_relax();
1429                 raw_spin_lock(&qi->q_lock);
1430         }
1431
1432         for (i = 0; i < count; i++)
1433                 qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1434
1435         reclaim_free_desc(qi);
1436         raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1437
1438         if (rc == -EAGAIN)
1439                 goto restart;
1440
1441         if (iotlb_start_ktime)
1442                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1443                                 ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1444
1445         if (devtlb_start_ktime)
1446                 dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1447                                 ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1448
1449         if (iec_start_ktime)
1450                 dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1451                                 ktime_to_ns(ktime_get()) - iec_start_ktime);
1452
1453         return rc;
1454 }
1455
1456 /*
1457  * Flush the global interrupt entry cache.
1458  */
1459 void qi_global_iec(struct intel_iommu *iommu)
1460 {
1461         struct qi_desc desc;
1462
1463         desc.qw0 = QI_IEC_TYPE;
1464         desc.qw1 = 0;
1465         desc.qw2 = 0;
1466         desc.qw3 = 0;
1467
1468         /* should never fail */
1469         qi_submit_sync(iommu, &desc, 1, 0);
1470 }
1471
1472 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1473                       u64 type)
1474 {
1475         struct qi_desc desc;
1476
1477         desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1478                         | QI_CC_GRAN(type) | QI_CC_TYPE;
1479         desc.qw1 = 0;
1480         desc.qw2 = 0;
1481         desc.qw3 = 0;
1482
1483         qi_submit_sync(iommu, &desc, 1, 0);
1484 }
1485
1486 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1487                     unsigned int size_order, u64 type)
1488 {
1489         u8 dw = 0, dr = 0;
1490
1491         struct qi_desc desc;
1492         int ih = 0;
1493
1494         if (cap_write_drain(iommu->cap))
1495                 dw = 1;
1496
1497         if (cap_read_drain(iommu->cap))
1498                 dr = 1;
1499
1500         desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1501                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1502         desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1503                 | QI_IOTLB_AM(size_order);
1504         desc.qw2 = 0;
1505         desc.qw3 = 0;
1506
1507         qi_submit_sync(iommu, &desc, 1, 0);
1508 }
1509
1510 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1511                         u16 qdep, u64 addr, unsigned mask)
1512 {
1513         struct qi_desc desc;
1514
1515         if (mask) {
1516                 addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1517                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1518         } else
1519                 desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1520
1521         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1522                 qdep = 0;
1523
1524         desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1525                    QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1526         desc.qw2 = 0;
1527         desc.qw3 = 0;
1528
1529         qi_submit_sync(iommu, &desc, 1, 0);
1530 }
1531
1532 /* PASID-based IOTLB invalidation */
1533 void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1534                      unsigned long npages, bool ih)
1535 {
1536         struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1537
1538         /*
1539          * npages == -1 means a PASID-selective invalidation, otherwise,
1540          * a positive value for Page-selective-within-PASID invalidation.
1541          * 0 is not a valid input.
1542          */
1543         if (WARN_ON(!npages)) {
1544                 pr_err("Invalid input npages = %ld\n", npages);
1545                 return;
1546         }
1547
1548         if (npages == -1) {
1549                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1550                                 QI_EIOTLB_DID(did) |
1551                                 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1552                                 QI_EIOTLB_TYPE;
1553                 desc.qw1 = 0;
1554         } else {
1555                 int mask = ilog2(__roundup_pow_of_two(npages));
1556                 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1557
1558                 if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1559                         addr = ALIGN_DOWN(addr, align);
1560
1561                 desc.qw0 = QI_EIOTLB_PASID(pasid) |
1562                                 QI_EIOTLB_DID(did) |
1563                                 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1564                                 QI_EIOTLB_TYPE;
1565                 desc.qw1 = QI_EIOTLB_ADDR(addr) |
1566                                 QI_EIOTLB_IH(ih) |
1567                                 QI_EIOTLB_AM(mask);
1568         }
1569
1570         qi_submit_sync(iommu, &desc, 1, 0);
1571 }
1572
1573 /* PASID-based device IOTLB Invalidate */
1574 void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1575                               u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1576 {
1577         unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1578         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1579
1580         desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1581                 QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1582                 QI_DEV_IOTLB_PFSID(pfsid);
1583
1584         /*
1585          * If S bit is 0, we only flush a single page. If S bit is set,
1586          * The least significant zero bit indicates the invalidation address
1587          * range. VT-d spec 6.5.2.6.
1588          * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1589          * size order = 0 is PAGE_SIZE 4KB
1590          * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1591          * ECAP.
1592          */
1593         if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1594                 pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1595                                     addr, size_order);
1596
1597         /* Take page address */
1598         desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1599
1600         if (size_order) {
1601                 /*
1602                  * Existing 0s in address below size_order may be the least
1603                  * significant bit, we must set them to 1s to avoid having
1604                  * smaller size than desired.
1605                  */
1606                 desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1607                                         VTD_PAGE_SHIFT);
1608                 /* Clear size_order bit to indicate size */
1609                 desc.qw1 &= ~mask;
1610                 /* Set the S bit to indicate flushing more than 1 page */
1611                 desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1612         }
1613
1614         qi_submit_sync(iommu, &desc, 1, 0);
1615 }
1616
1617 void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1618                           u64 granu, u32 pasid)
1619 {
1620         struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1621
1622         desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1623                         QI_PC_GRAN(granu) | QI_PC_TYPE;
1624         qi_submit_sync(iommu, &desc, 1, 0);
1625 }
1626
1627 /*
1628  * Disable Queued Invalidation interface.
1629  */
1630 void dmar_disable_qi(struct intel_iommu *iommu)
1631 {
1632         unsigned long flags;
1633         u32 sts;
1634         cycles_t start_time = get_cycles();
1635
1636         if (!ecap_qis(iommu->ecap))
1637                 return;
1638
1639         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1640
1641         sts =  readl(iommu->reg + DMAR_GSTS_REG);
1642         if (!(sts & DMA_GSTS_QIES))
1643                 goto end;
1644
1645         /*
1646          * Give a chance to HW to complete the pending invalidation requests.
1647          */
1648         while ((readl(iommu->reg + DMAR_IQT_REG) !=
1649                 readl(iommu->reg + DMAR_IQH_REG)) &&
1650                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1651                 cpu_relax();
1652
1653         iommu->gcmd &= ~DMA_GCMD_QIE;
1654         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1655
1656         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1657                       !(sts & DMA_GSTS_QIES), sts);
1658 end:
1659         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1660 }
1661
1662 /*
1663  * Enable queued invalidation.
1664  */
1665 static void __dmar_enable_qi(struct intel_iommu *iommu)
1666 {
1667         u32 sts;
1668         unsigned long flags;
1669         struct q_inval *qi = iommu->qi;
1670         u64 val = virt_to_phys(qi->desc);
1671
1672         qi->free_head = qi->free_tail = 0;
1673         qi->free_cnt = QI_LENGTH;
1674
1675         /*
1676          * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1677          * is present.
1678          */
1679         if (ecap_smts(iommu->ecap))
1680                 val |= (1 << 11) | 1;
1681
1682         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1683
1684         /* write zero to the tail reg */
1685         writel(0, iommu->reg + DMAR_IQT_REG);
1686
1687         dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1688
1689         iommu->gcmd |= DMA_GCMD_QIE;
1690         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1691
1692         /* Make sure hardware complete it */
1693         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1694
1695         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1696 }
1697
1698 /*
1699  * Enable Queued Invalidation interface. This is a must to support
1700  * interrupt-remapping. Also used by DMA-remapping, which replaces
1701  * register based IOTLB invalidation.
1702  */
1703 int dmar_enable_qi(struct intel_iommu *iommu)
1704 {
1705         struct q_inval *qi;
1706         struct page *desc_page;
1707
1708         if (!ecap_qis(iommu->ecap))
1709                 return -ENOENT;
1710
1711         /*
1712          * queued invalidation is already setup and enabled.
1713          */
1714         if (iommu->qi)
1715                 return 0;
1716
1717         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1718         if (!iommu->qi)
1719                 return -ENOMEM;
1720
1721         qi = iommu->qi;
1722
1723         /*
1724          * Need two pages to accommodate 256 descriptors of 256 bits each
1725          * if the remapping hardware supports scalable mode translation.
1726          */
1727         desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1728                                      !!ecap_smts(iommu->ecap));
1729         if (!desc_page) {
1730                 kfree(qi);
1731                 iommu->qi = NULL;
1732                 return -ENOMEM;
1733         }
1734
1735         qi->desc = page_address(desc_page);
1736
1737         qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1738         if (!qi->desc_status) {
1739                 free_page((unsigned long) qi->desc);
1740                 kfree(qi);
1741                 iommu->qi = NULL;
1742                 return -ENOMEM;
1743         }
1744
1745         raw_spin_lock_init(&qi->q_lock);
1746
1747         __dmar_enable_qi(iommu);
1748
1749         return 0;
1750 }
1751
1752 /* iommu interrupt handling. Most stuff are MSI-like. */
1753
1754 enum faulttype {
1755         DMA_REMAP,
1756         INTR_REMAP,
1757         UNKNOWN,
1758 };
1759
1760 static const char *dma_remap_fault_reasons[] =
1761 {
1762         "Software",
1763         "Present bit in root entry is clear",
1764         "Present bit in context entry is clear",
1765         "Invalid context entry",
1766         "Access beyond MGAW",
1767         "PTE Write access is not set",
1768         "PTE Read access is not set",
1769         "Next page table ptr is invalid",
1770         "Root table address invalid",
1771         "Context table ptr is invalid",
1772         "non-zero reserved fields in RTP",
1773         "non-zero reserved fields in CTP",
1774         "non-zero reserved fields in PTE",
1775         "PCE for translation request specifies blocking",
1776 };
1777
1778 static const char * const dma_remap_sm_fault_reasons[] = {
1779         "SM: Invalid Root Table Address",
1780         "SM: TTM 0 for request with PASID",
1781         "SM: TTM 0 for page group request",
1782         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1783         "SM: Error attempting to access Root Entry",
1784         "SM: Present bit in Root Entry is clear",
1785         "SM: Non-zero reserved field set in Root Entry",
1786         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1787         "SM: Error attempting to access Context Entry",
1788         "SM: Present bit in Context Entry is clear",
1789         "SM: Non-zero reserved field set in the Context Entry",
1790         "SM: Invalid Context Entry",
1791         "SM: DTE field in Context Entry is clear",
1792         "SM: PASID Enable field in Context Entry is clear",
1793         "SM: PASID is larger than the max in Context Entry",
1794         "SM: PRE field in Context-Entry is clear",
1795         "SM: RID_PASID field error in Context-Entry",
1796         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1797         "SM: Error attempting to access the PASID Directory Entry",
1798         "SM: Present bit in Directory Entry is clear",
1799         "SM: Non-zero reserved field set in PASID Directory Entry",
1800         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1801         "SM: Error attempting to access PASID Table Entry",
1802         "SM: Present bit in PASID Table Entry is clear",
1803         "SM: Non-zero reserved field set in PASID Table Entry",
1804         "SM: Invalid Scalable-Mode PASID Table Entry",
1805         "SM: ERE field is clear in PASID Table Entry",
1806         "SM: SRE field is clear in PASID Table Entry",
1807         "Unknown", "Unknown",/* 0x5E-0x5F */
1808         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1809         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1810         "SM: Error attempting to access first-level paging entry",
1811         "SM: Present bit in first-level paging entry is clear",
1812         "SM: Non-zero reserved field set in first-level paging entry",
1813         "SM: Error attempting to access FL-PML4 entry",
1814         "SM: First-level entry address beyond MGAW in Nested translation",
1815         "SM: Read permission error in FL-PML4 entry in Nested translation",
1816         "SM: Read permission error in first-level paging entry in Nested translation",
1817         "SM: Write permission error in first-level paging entry in Nested translation",
1818         "SM: Error attempting to access second-level paging entry",
1819         "SM: Read/Write permission error in second-level paging entry",
1820         "SM: Non-zero reserved field set in second-level paging entry",
1821         "SM: Invalid second-level page table pointer",
1822         "SM: A/D bit update needed in second-level entry when set up in no snoop",
1823         "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1824         "SM: Address in first-level translation is not canonical",
1825         "SM: U/S set 0 for first-level translation with user privilege",
1826         "SM: No execute permission for request with PASID and ER=1",
1827         "SM: Address beyond the DMA hardware max",
1828         "SM: Second-level entry address beyond the max",
1829         "SM: No write permission for Write/AtomicOp request",
1830         "SM: No read permission for Read/AtomicOp request",
1831         "SM: Invalid address-interrupt address",
1832         "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1833         "SM: A/D bit update needed in first-level entry when set up in no snoop",
1834 };
1835
1836 static const char *irq_remap_fault_reasons[] =
1837 {
1838         "Detected reserved fields in the decoded interrupt-remapped request",
1839         "Interrupt index exceeded the interrupt-remapping table size",
1840         "Present field in the IRTE entry is clear",
1841         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1842         "Detected reserved fields in the IRTE entry",
1843         "Blocked a compatibility format interrupt request",
1844         "Blocked an interrupt request due to source-id verification failure",
1845 };
1846
1847 static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1848 {
1849         if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1850                                         ARRAY_SIZE(irq_remap_fault_reasons))) {
1851                 *fault_type = INTR_REMAP;
1852                 return irq_remap_fault_reasons[fault_reason - 0x20];
1853         } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1854                         ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1855                 *fault_type = DMA_REMAP;
1856                 return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1857         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1858                 *fault_type = DMA_REMAP;
1859                 return dma_remap_fault_reasons[fault_reason];
1860         } else {
1861                 *fault_type = UNKNOWN;
1862                 return "Unknown";
1863         }
1864 }
1865
1866
1867 static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1868 {
1869         if (iommu->irq == irq)
1870                 return DMAR_FECTL_REG;
1871         else if (iommu->pr_irq == irq)
1872                 return DMAR_PECTL_REG;
1873         else
1874                 BUG();
1875 }
1876
1877 void dmar_msi_unmask(struct irq_data *data)
1878 {
1879         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1880         int reg = dmar_msi_reg(iommu, data->irq);
1881         unsigned long flag;
1882
1883         /* unmask it */
1884         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1885         writel(0, iommu->reg + reg);
1886         /* Read a reg to force flush the post write */
1887         readl(iommu->reg + reg);
1888         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1889 }
1890
1891 void dmar_msi_mask(struct irq_data *data)
1892 {
1893         struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1894         int reg = dmar_msi_reg(iommu, data->irq);
1895         unsigned long flag;
1896
1897         /* mask it */
1898         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1899         writel(DMA_FECTL_IM, iommu->reg + reg);
1900         /* Read a reg to force flush the post write */
1901         readl(iommu->reg + reg);
1902         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1903 }
1904
1905 void dmar_msi_write(int irq, struct msi_msg *msg)
1906 {
1907         struct intel_iommu *iommu = irq_get_handler_data(irq);
1908         int reg = dmar_msi_reg(iommu, irq);
1909         unsigned long flag;
1910
1911         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1912         writel(msg->data, iommu->reg + reg + 4);
1913         writel(msg->address_lo, iommu->reg + reg + 8);
1914         writel(msg->address_hi, iommu->reg + reg + 12);
1915         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1916 }
1917
1918 void dmar_msi_read(int irq, struct msi_msg *msg)
1919 {
1920         struct intel_iommu *iommu = irq_get_handler_data(irq);
1921         int reg = dmar_msi_reg(iommu, irq);
1922         unsigned long flag;
1923
1924         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1925         msg->data = readl(iommu->reg + reg + 4);
1926         msg->address_lo = readl(iommu->reg + reg + 8);
1927         msg->address_hi = readl(iommu->reg + reg + 12);
1928         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1929 }
1930
1931 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1932                 u8 fault_reason, u32 pasid, u16 source_id,
1933                 unsigned long long addr)
1934 {
1935         const char *reason;
1936         int fault_type;
1937
1938         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1939
1940         if (fault_type == INTR_REMAP) {
1941                 pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1942                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1943                        PCI_FUNC(source_id & 0xFF), addr >> 48,
1944                        fault_reason, reason);
1945
1946                 return 0;
1947         }
1948
1949         if (pasid == INVALID_IOASID)
1950                 pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1951                        type ? "DMA Read" : "DMA Write",
1952                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1953                        PCI_FUNC(source_id & 0xFF), addr,
1954                        fault_reason, reason);
1955         else
1956                 pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1957                        type ? "DMA Read" : "DMA Write", pasid,
1958                        source_id >> 8, PCI_SLOT(source_id & 0xFF),
1959                        PCI_FUNC(source_id & 0xFF), addr,
1960                        fault_reason, reason);
1961
1962         dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
1963
1964         return 0;
1965 }
1966
1967 #define PRIMARY_FAULT_REG_LEN (16)
1968 irqreturn_t dmar_fault(int irq, void *dev_id)
1969 {
1970         struct intel_iommu *iommu = dev_id;
1971         int reg, fault_index;
1972         u32 fault_status;
1973         unsigned long flag;
1974         static DEFINE_RATELIMIT_STATE(rs,
1975                                       DEFAULT_RATELIMIT_INTERVAL,
1976                                       DEFAULT_RATELIMIT_BURST);
1977
1978         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1979         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1980         if (fault_status && __ratelimit(&rs))
1981                 pr_err("DRHD: handling fault status reg %x\n", fault_status);
1982
1983         /* TBD: ignore advanced fault log currently */
1984         if (!(fault_status & DMA_FSTS_PPF))
1985                 goto unlock_exit;
1986
1987         fault_index = dma_fsts_fault_record_index(fault_status);
1988         reg = cap_fault_reg_offset(iommu->cap);
1989         while (1) {
1990                 /* Disable printing, simply clear the fault when ratelimited */
1991                 bool ratelimited = !__ratelimit(&rs);
1992                 u8 fault_reason;
1993                 u16 source_id;
1994                 u64 guest_addr;
1995                 u32 pasid;
1996                 int type;
1997                 u32 data;
1998                 bool pasid_present;
1999
2000                 /* highest 32 bits */
2001                 data = readl(iommu->reg + reg +
2002                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
2003                 if (!(data & DMA_FRCD_F))
2004                         break;
2005
2006                 if (!ratelimited) {
2007                         fault_reason = dma_frcd_fault_reason(data);
2008                         type = dma_frcd_type(data);
2009
2010                         pasid = dma_frcd_pasid_value(data);
2011                         data = readl(iommu->reg + reg +
2012                                      fault_index * PRIMARY_FAULT_REG_LEN + 8);
2013                         source_id = dma_frcd_source_id(data);
2014
2015                         pasid_present = dma_frcd_pasid_present(data);
2016                         guest_addr = dmar_readq(iommu->reg + reg +
2017                                         fault_index * PRIMARY_FAULT_REG_LEN);
2018                         guest_addr = dma_frcd_page_addr(guest_addr);
2019                 }
2020
2021                 /* clear the fault */
2022                 writel(DMA_FRCD_F, iommu->reg + reg +
2023                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
2024
2025                 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2026
2027                 if (!ratelimited)
2028                         /* Using pasid -1 if pasid is not present */
2029                         dmar_fault_do_one(iommu, type, fault_reason,
2030                                           pasid_present ? pasid : INVALID_IOASID,
2031                                           source_id, guest_addr);
2032
2033                 fault_index++;
2034                 if (fault_index >= cap_num_fault_regs(iommu->cap))
2035                         fault_index = 0;
2036                 raw_spin_lock_irqsave(&iommu->register_lock, flag);
2037         }
2038
2039         writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2040                iommu->reg + DMAR_FSTS_REG);
2041
2042 unlock_exit:
2043         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2044         return IRQ_HANDLED;
2045 }
2046
2047 int dmar_set_interrupt(struct intel_iommu *iommu)
2048 {
2049         int irq, ret;
2050
2051         /*
2052          * Check if the fault interrupt is already initialized.
2053          */
2054         if (iommu->irq)
2055                 return 0;
2056
2057         irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2058         if (irq > 0) {
2059                 iommu->irq = irq;
2060         } else {
2061                 pr_err("No free IRQ vectors\n");
2062                 return -EINVAL;
2063         }
2064
2065         ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2066         if (ret)
2067                 pr_err("Can't request irq\n");
2068         return ret;
2069 }
2070
2071 int __init enable_drhd_fault_handling(void)
2072 {
2073         struct dmar_drhd_unit *drhd;
2074         struct intel_iommu *iommu;
2075
2076         /*
2077          * Enable fault control interrupt.
2078          */
2079         for_each_iommu(iommu, drhd) {
2080                 u32 fault_status;
2081                 int ret = dmar_set_interrupt(iommu);
2082
2083                 if (ret) {
2084                         pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2085                                (unsigned long long)drhd->reg_base_addr, ret);
2086                         return -1;
2087                 }
2088
2089                 /*
2090                  * Clear any previous faults.
2091                  */
2092                 dmar_fault(iommu->irq, iommu);
2093                 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2094                 writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2095         }
2096
2097         return 0;
2098 }
2099
2100 /*
2101  * Re-enable Queued Invalidation interface.
2102  */
2103 int dmar_reenable_qi(struct intel_iommu *iommu)
2104 {
2105         if (!ecap_qis(iommu->ecap))
2106                 return -ENOENT;
2107
2108         if (!iommu->qi)
2109                 return -ENOENT;
2110
2111         /*
2112          * First disable queued invalidation.
2113          */
2114         dmar_disable_qi(iommu);
2115         /*
2116          * Then enable queued invalidation again. Since there is no pending
2117          * invalidation requests now, it's safe to re-enable queued
2118          * invalidation.
2119          */
2120         __dmar_enable_qi(iommu);
2121
2122         return 0;
2123 }
2124
2125 /*
2126  * Check interrupt remapping support in DMAR table description.
2127  */
2128 int __init dmar_ir_support(void)
2129 {
2130         struct acpi_table_dmar *dmar;
2131         dmar = (struct acpi_table_dmar *)dmar_tbl;
2132         if (!dmar)
2133                 return 0;
2134         return dmar->flags & 0x1;
2135 }
2136
2137 /* Check whether DMAR units are in use */
2138 static inline bool dmar_in_use(void)
2139 {
2140         return irq_remapping_enabled || intel_iommu_enabled;
2141 }
2142
2143 static int __init dmar_free_unused_resources(void)
2144 {
2145         struct dmar_drhd_unit *dmaru, *dmaru_n;
2146
2147         if (dmar_in_use())
2148                 return 0;
2149
2150         if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2151                 bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2152
2153         down_write(&dmar_global_lock);
2154         list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2155                 list_del(&dmaru->list);
2156                 dmar_free_drhd(dmaru);
2157         }
2158         up_write(&dmar_global_lock);
2159
2160         return 0;
2161 }
2162
2163 late_initcall(dmar_free_unused_resources);
2164
2165 /*
2166  * DMAR Hotplug Support
2167  * For more details, please refer to Intel(R) Virtualization Technology
2168  * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2169  * "Remapping Hardware Unit Hot Plug".
2170  */
2171 static guid_t dmar_hp_guid =
2172         GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2173                   0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2174
2175 /*
2176  * Currently there's only one revision and BIOS will not check the revision id,
2177  * so use 0 for safety.
2178  */
2179 #define DMAR_DSM_REV_ID                 0
2180 #define DMAR_DSM_FUNC_DRHD              1
2181 #define DMAR_DSM_FUNC_ATSR              2
2182 #define DMAR_DSM_FUNC_RHSA              3
2183 #define DMAR_DSM_FUNC_SATC              4
2184
2185 static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2186 {
2187         return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2188 }
2189
2190 static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2191                                   dmar_res_handler_t handler, void *arg)
2192 {
2193         int ret = -ENODEV;
2194         union acpi_object *obj;
2195         struct acpi_dmar_header *start;
2196         struct dmar_res_callback callback;
2197         static int res_type[] = {
2198                 [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2199                 [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2200                 [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2201                 [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2202         };
2203
2204         if (!dmar_detect_dsm(handle, func))
2205                 return 0;
2206
2207         obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2208                                       func, NULL, ACPI_TYPE_BUFFER);
2209         if (!obj)
2210                 return -ENODEV;
2211
2212         memset(&callback, 0, sizeof(callback));
2213         callback.cb[res_type[func]] = handler;
2214         callback.arg[res_type[func]] = arg;
2215         start = (struct acpi_dmar_header *)obj->buffer.pointer;
2216         ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2217
2218         ACPI_FREE(obj);
2219
2220         return ret;
2221 }
2222
2223 static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2224 {
2225         int ret;
2226         struct dmar_drhd_unit *dmaru;
2227
2228         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2229         if (!dmaru)
2230                 return -ENODEV;
2231
2232         ret = dmar_ir_hotplug(dmaru, true);
2233         if (ret == 0)
2234                 ret = dmar_iommu_hotplug(dmaru, true);
2235
2236         return ret;
2237 }
2238
2239 static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2240 {
2241         int i, ret;
2242         struct device *dev;
2243         struct dmar_drhd_unit *dmaru;
2244
2245         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2246         if (!dmaru)
2247                 return 0;
2248
2249         /*
2250          * All PCI devices managed by this unit should have been destroyed.
2251          */
2252         if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2253                 for_each_active_dev_scope(dmaru->devices,
2254                                           dmaru->devices_cnt, i, dev)
2255                         return -EBUSY;
2256         }
2257
2258         ret = dmar_ir_hotplug(dmaru, false);
2259         if (ret == 0)
2260                 ret = dmar_iommu_hotplug(dmaru, false);
2261
2262         return ret;
2263 }
2264
2265 static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2266 {
2267         struct dmar_drhd_unit *dmaru;
2268
2269         dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2270         if (dmaru) {
2271                 list_del_rcu(&dmaru->list);
2272                 synchronize_rcu();
2273                 dmar_free_drhd(dmaru);
2274         }
2275
2276         return 0;
2277 }
2278
2279 static int dmar_hotplug_insert(acpi_handle handle)
2280 {
2281         int ret;
2282         int drhd_count = 0;
2283
2284         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2285                                      &dmar_validate_one_drhd, (void *)1);
2286         if (ret)
2287                 goto out;
2288
2289         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2290                                      &dmar_parse_one_drhd, (void *)&drhd_count);
2291         if (ret == 0 && drhd_count == 0) {
2292                 pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2293                 goto out;
2294         } else if (ret) {
2295                 goto release_drhd;
2296         }
2297
2298         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2299                                      &dmar_parse_one_rhsa, NULL);
2300         if (ret)
2301                 goto release_drhd;
2302
2303         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2304                                      &dmar_parse_one_atsr, NULL);
2305         if (ret)
2306                 goto release_atsr;
2307
2308         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2309                                      &dmar_hp_add_drhd, NULL);
2310         if (!ret)
2311                 return 0;
2312
2313         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2314                                &dmar_hp_remove_drhd, NULL);
2315 release_atsr:
2316         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2317                                &dmar_release_one_atsr, NULL);
2318 release_drhd:
2319         dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2320                                &dmar_hp_release_drhd, NULL);
2321 out:
2322         return ret;
2323 }
2324
2325 static int dmar_hotplug_remove(acpi_handle handle)
2326 {
2327         int ret;
2328
2329         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2330                                      &dmar_check_one_atsr, NULL);
2331         if (ret)
2332                 return ret;
2333
2334         ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2335                                      &dmar_hp_remove_drhd, NULL);
2336         if (ret == 0) {
2337                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2338                                                &dmar_release_one_atsr, NULL));
2339                 WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2340                                                &dmar_hp_release_drhd, NULL));
2341         } else {
2342                 dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2343                                        &dmar_hp_add_drhd, NULL);
2344         }
2345
2346         return ret;
2347 }
2348
2349 static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2350                                        void *context, void **retval)
2351 {
2352         acpi_handle *phdl = retval;
2353
2354         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2355                 *phdl = handle;
2356                 return AE_CTRL_TERMINATE;
2357         }
2358
2359         return AE_OK;
2360 }
2361
2362 static int dmar_device_hotplug(acpi_handle handle, bool insert)
2363 {
2364         int ret;
2365         acpi_handle tmp = NULL;
2366         acpi_status status;
2367
2368         if (!dmar_in_use())
2369                 return 0;
2370
2371         if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2372                 tmp = handle;
2373         } else {
2374                 status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2375                                              ACPI_UINT32_MAX,
2376                                              dmar_get_dsm_handle,
2377                                              NULL, NULL, &tmp);
2378                 if (ACPI_FAILURE(status)) {
2379                         pr_warn("Failed to locate _DSM method.\n");
2380                         return -ENXIO;
2381                 }
2382         }
2383         if (tmp == NULL)
2384                 return 0;
2385
2386         down_write(&dmar_global_lock);
2387         if (insert)
2388                 ret = dmar_hotplug_insert(tmp);
2389         else
2390                 ret = dmar_hotplug_remove(tmp);
2391         up_write(&dmar_global_lock);
2392
2393         return ret;
2394 }
2395
2396 int dmar_device_add(acpi_handle handle)
2397 {
2398         return dmar_device_hotplug(handle, true);
2399 }
2400
2401 int dmar_device_remove(acpi_handle handle)
2402 {
2403         return dmar_device_hotplug(handle, false);
2404 }
2405
2406 /*
2407  * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2408  *
2409  * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2410  * the ACPI DMAR table. This means that the platform boot firmware has made
2411  * sure no device can issue DMA outside of RMRR regions.
2412  */
2413 bool dmar_platform_optin(void)
2414 {
2415         struct acpi_table_dmar *dmar;
2416         acpi_status status;
2417         bool ret;
2418
2419         status = acpi_get_table(ACPI_SIG_DMAR, 0,
2420                                 (struct acpi_table_header **)&dmar);
2421         if (ACPI_FAILURE(status))
2422                 return false;
2423
2424         ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2425         acpi_put_table((struct acpi_table_header *)dmar);
2426
2427         return ret;
2428 }
2429 EXPORT_SYMBOL_GPL(dmar_platform_optin);